In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib
import os

# Step 1: Load dataset
data_path = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\data.xlsx"

if not os.path.exists(data_path):
    raise FileNotFoundError(f"Excel file not found at: {data_path}")

#  Read Excel file instead of CSV
data = pd.read_excel(data_path)

# Step 2: Display a sample
print("Sample data:")
display(data.head())

# Step 3: Split data into features and labels
X = data.drop(columns=["disease"])
y = data["disease"]

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Step 6: Evaluate
accuracy = model.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Step 7: Save model
model_dir = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\models\medbot_model.pkll"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, "medbot_model.pkl")
joblib.dump(model, model_path)

print(f" Model trained and saved successfully at: {model_path}")


Sample data:


Unnamed: 0,fever,cough,fatigue,headache,sore_throat,chest_pain,shortness_of_breath,disease
0,1,1,1,0,0,0,0,Common Cold
1,1,0,1,1,1,0,0,Flu
2,0,0,0,0,0,1,1,Pneumonia
3,1,1,1,1,0,1,1,COVID-19
4,0,0,0,1,0,0,0,Migraine


Model Accuracy: 0.00%
 Model trained and saved successfully at: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\models\medbot_model.pkll\medbot_model.pkl


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib
import os

# Step 1: Define paths
data_path = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\Heart_disease_statlog.xlsx"
model_dir = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\models"
model_path = os.path.join(model_dir, "medbot_model.pkl")

# Step 2: Validate dataset path
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Excel file not found at: {data_path}")

# Step 3: Load dataset
data = pd.read_excel(data_path)
print(" Sample data:")
print(data.head())

# Step 4: Extract features and labels
if "target" not in data.columns:
    raise ValueError("Expected a 'target' column in the dataset.")

X = data.drop(columns=["target"])
y = data["target"]

# Step 5: Preprocess features
X = X.select_dtypes(include=["number"]).fillna(0)

# Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Train model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Step 8: Evaluate model

 Sample data:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   70    1   3       130   322    0        2      109      0      2.4      1   
1   67    0   2       115   564    0        2      160      0      1.6      1   
2   57    1   1       124   261    0        0      141      0      0.3      0   
3   64    1   3       128   263    0        0      105      1      0.2      1   
4   74    0   1       120   269    0        2      121      1      0.2      0   

   ca  thal  target  
0   3     1       1  
1   0     3       0  
2   0     3       1  
3   1     3       0  
4   1     1       0  


0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [3]:
import importlib.util
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib
import os

# Load config file
config_path = r"D:\Machine-Learning-Projects\Ai_Medbot\medbot\config.py"
spec = importlib.util.spec_from_file_location("config", config_path)
config = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config)

DATA_PATH = config.DATA_PATH
MODEL_PATH = config.MODEL_PATH

print("Using DATA_PATH:", DATA_PATH)
print("Using MODEL_PATH:", MODEL_PATH)

# Load dataset
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"❌ Excel file not found at: {DATA_PATH}")

data = pd.read_excel(DATA_PATH)
print("Sample data:")
print(data.head())

# Prepare features & labels
label_column = None
for possible_label in ["target", "disease", "label"]:
    if possible_label in data.columns:
        label_column = possible_label
        break

if label_column is None:
    raise ValueError("No label column named 'target', 'disease', or 'label' found in dataset.")

X = data.drop(columns=[label_column]).select_dtypes(include=["number"]).fillna(0)
y = data[label_column]

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

accuracy = model.score(X_test, y_test)
print(f"✅ Model Accuracy: {accuracy * 100:.2f}%")

# Save model
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(model, MODEL_PATH)
print(f"💾 Model saved at: {MODEL_PATH}")


Loading dataset from: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\data.xlsx
✅ Excel file loaded successfully.
Using 'disease' as the target column.
✅ Model trained. Test Accuracy: 0.00%
✅ Model saved at: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\model\model.pkl
Using DATA_PATH: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\data.xlsx
Using MODEL_PATH: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\model\model.pkl
Sample data:
   fever  cough  fatigue  headache  sore_throat  chest_pain  \
0      1      1        1         0            0           0   
1      1      0        1         1            1           0   
2      0      0        0         0            0           1   
3      1      1        1         1            0           1   
4      0      0        0         1            0           0   

   shortness_of_breath      disease  
0                    0  Common Cold  
1                    0          Flu  
2                    1    Pneumonia  
3    

In [4]:
# ============================================================
# Ai MedBot - Train Decision Tree Model
# ============================================================

# ---------------------------
# 1️ Import Libraries
# ---------------------------
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib

# ---------------------------
# 2️ Define Paths
# ---------------------------
# Update these paths according to your project structure
DATA_PATH = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\data.xlsx"
MODEL_PATH = r"D:\Machine-Learning-Projects\Ai_Medbot\ml_training\model\model.pkl"

# ---------------------------
# 3️ Load Dataset
# ---------------------------
print(f"Loading dataset from: {DATA_PATH}")
try:
    data = pd.read_excel(DATA_PATH)
    print(" Excel file loaded successfully.")
except FileNotFoundError:
    print(" Excel file not found. Please check the DATA_PATH.")
    raise

# ---------------------------
# 4️ Identify Label Column
# ---------------------------
label_col = "disease" if "disease" in data.columns else data.columns[-1]
print(f"Using '{label_col}' as the target column.")

# ---------------------------
# 5️ Split Features and Target
# ---------------------------
X = data.drop(columns=[label_col])
y = data[label_col]

# ---------------------------
# 6️ Train-Test Split
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f" Data split: {X_train.shape[0]} training samples, {X_test.shape[0]} test samples.")

# ---------------------------
# 7️ Train Model
# ---------------------------
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
print(" Model trained successfully.")

# ---------------------------
# 8️ Evaluate Model
# ---------------------------
accuracy = model.score(X_test, y_test)
print(f" Test Accuracy: {accuracy:.2%}")

# ---------------------------
# 9️ Save Model
# ---------------------------
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(model, MODEL_PATH)
print(f" Model saved at: {MODEL_PATH}")


Loading dataset from: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\data.xlsx
 Excel file loaded successfully.
Using 'disease' as the target column.
 Data split: 7 training samples, 2 test samples.
 Model trained successfully.
 Test Accuracy: 0.00%
 Model saved at: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\model\model.pkl


In [5]:
# ============================================================
# Ai MedBot - Random Forest Model Training for Heart Disease
# ============================================================

# ---------------------------
# 1️ Import Libraries
# ---------------------------
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib

# ---------------------------
# 2️ Define Paths
# ---------------------------
DATA_PATH = r'D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\heart.xlsx'
MODEL_PATH = r'D:\Machine-Learning-Projects\Ai_Medbot\ml_training\models\heart_rf_model.pkl'

# ---------------------------
# 3️ Load Dataset
# ---------------------------
print(f"Loading dataset from: {DATA_PATH}")
try:
    df = pd.read_excel(DATA_PATH)
    print(" Excel file loaded successfully.")
except FileNotFoundError:
    print(" Excel file not found. Please check the DATA_PATH.")
    raise

# ---------------------------
# 4️ Encode Categorical Columns
# ---------------------------
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

le_dict = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le  # Save encoder for future use

print(" Categorical columns encoded:", categorical_cols)

# ---------------------------
# 5️ Split Features and Target
# ---------------------------
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

# ---------------------------
# 6️ Train-Test Split
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f" Data split: {X_train.shape[0]} training samples, {X_test.shape[0]} test samples.")

# ---------------------------
# 7️ Train Random Forest Model
# ---------------------------
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
print(" Model trained successfully.")

# ---------------------------
# 8️ Evaluate Model
# ---------------------------
accuracy = model.score(X_test, y_test)
print(f" Test Accuracy: {accuracy:.2%}")

# ---------------------------
# 9️ Save Model
# ---------------------------
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(model, MODEL_PATH)
print(f" Model saved at: {MODEL_PATH}")

# ---------------------------
# 10 Optional: Predict New Patient
# ---------------------------
# Example usage:
# new_patient = pd.DataFrame([{
#     'Age': 63, 'Sex': le_dict['Sex'].transform(['M'])[0],
#     'ChestPainType': le_dict['ChestPainType'].transform(['ATA'])[0],
#     'RestingBP': 145, 'Cholesterol': 233, 'FastingBS': 1,
#     'RestingECG': le_dict['RestingECG'].transform(['Normal'])[0],
#     'MaxHR': 150, 'ExerciseAngina': le_dict['ExerciseAngina'].transform(['N'])[0],
#     'Oldpeak': 2.3, 'ST_Slope': le_dict['ST_Slope'].transform(['Up'])[0]
# }])
# prediction = model.predict(new_patient)
# print("Predicted HeartDisease:", prediction[0])


Loading dataset from: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\data\heart.xlsx
 Excel file loaded successfully.
 Categorical columns encoded: ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
 Data split: 734 training samples, 184 test samples.
 Model trained successfully.
 Test Accuracy: 88.04%
 Model saved at: D:\Machine-Learning-Projects\Ai_Medbot\ml_training\models\heart_rf_model.pkl
