In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Set project root explicitly (repo root)
project_root = os.path.join(r"C:\Users\aksha\OneDrive\Desktop\Smart Patient Health Assistant", "Smart-Patient-Health-Assistant")
os.chdir(project_root)

print(f"üìÅ Working directory: {os.getcwd()}")

# Add to Python path
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import custom modules
from src.data_processing.load_data import DataLoader

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("‚úì All libraries imported successfully")

# Initialize data loader
config_path = os.path.join(project_root, 'config', 'config.yaml')
loader = DataLoader(config_path=config_path)

# Load all datasets
datasets = loader.load_all_datasets()

# Check what was loaded
print("\n" + "="*60)
print("DATASETS LOADED")
print("="*60)
for name, df in datasets.items():
    if df is not None:
        print(f"  ‚úì {name.capitalize()}: {df.shape}")
    else:
        print(f"  ‚úó {name.capitalize()}: Not loaded")

üìÅ Working directory: C:\Users\aksha\OneDrive\Desktop\Smart Patient Health Assistant\Smart-Patient-Health-Assistant
‚úì All libraries imported successfully
Loading all datasets...
--------------------------------------------------
‚úì Diabetes data loaded: 768 rows, 9 columns
‚úì Heart disease data loaded: 1025 rows, 14 columns
‚úì Kidney disease data loaded: 400 rows, 26 columns
--------------------------------------------------
‚úì Successfully loaded 3/3 datasets

DATASETS LOADED
  ‚úì Diabetes: (768, 9)
  ‚úì Heart: (1025, 14)
  ‚úì Kidney: (400, 26)


In [2]:
# Preview datasets and train simple baseline models
from src.models import ModelTrainer

# Display basic info for loaded datasets
for name, df in datasets.items():
    if df is not None:
        print(f"\n{name.upper()} head:")
        display(df.head())

# Train baseline models when possible
trainer = ModelTrainer(config_path=config_path)
train_results = {}
for name, df in datasets.items():
    if df is not None:
        try:
            result = trainer.train_and_save(name, df)
            train_results[name] = result.metrics
            print(f"\n‚úì Trained {name} model ‚Üí {result.model_path}")
            print(f"  Metrics: {result.metrics}")
        except Exception as e:
            print(f"‚úó Skipped training for {name}: {e}")

train_results



DIABETES head:


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1



HEART head:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0



KIDNEY head:


Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,121.0,36.0,1.2,,,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,,18.0,0.8,,,11.3,38,6000,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,423.0,53.0,1.8,,,9.6,31,7500,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,56.0,3.8,111.0,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,106.0,26.0,1.4,,,11.6,35,7300,4.6,no,no,no,good,no,no,ckd



‚úì Trained diabetes model ‚Üí models/saved_models\diabetes_model.joblib
  Metrics: {'roc_auc': 0.8490131578947367, 'accuracy': 0.7586206896551724}

‚úì Trained heart model ‚Üí models/saved_models\heart_model.joblib
  Metrics: {'roc_auc': 0.9999999999999999, 'accuracy': 1.0}
‚úó Skipped training for kidney: Labels in y_true and y_pred should be of the same type. Got y_true=['ckd' 'notckd'] and y_pred=[0]. Make sure that the predictions provided by the classifier coincides with the true labels.


{'diabetes': {'roc_auc': 0.8490131578947367, 'accuracy': 0.7586206896551724},
 'heart': {'roc_auc': 0.9999999999999999, 'accuracy': 1.0}}