In [29]:
import pandas as pd
import mlflow
import mlflow.sklearn as mlsk
from pathlib import Path
from mlflow.models.signature import infer_signature

In [2]:
TRAIN_DATA_PATH= Path("../data/processed/train.csv").resolve()

VAL_DATA_PATH= Path("../data/processed/val.csv").resolve()

In [3]:
train= pd.read_csv(TRAIN_DATA_PATH)
val= pd.read_csv(VAL_DATA_PATH)

In [4]:
train

Unnamed: 0,Age,Gender,Glucose,Blood Pressure,BMI,Oxygen Saturation,LengthOfStay,Cholesterol,Triglycerides,HbA1c,Smoking,Alcohol,Physical Activity,Diet Score,Family History,Stress Level,Sleep Hours,medical_condition
0,1.661322,0,-0.685954,1.175762,0.363904,0.033410,3,-0.003800,-0.898885,0.226317,0,0,0.069003,0.649004,1,2.815129,0.204118,5
1,-0.924910,1,-0.987994,-0.051761,-0.016328,0.683027,3,0.381913,0.050322,-0.134402,1,0,-0.676341,-0.411922,0,1.173440,0.878884,6
2,0.525903,0,-0.451194,0.397414,-0.174324,1.463099,4,0.936524,-0.200075,-1.141409,0,0,0.147980,0.577542,0,0.495055,1.342786,5
3,2.102874,0,-0.814988,-0.382358,-1.113620,0.198476,2,-0.292790,0.659367,-0.156947,1,0,-0.952760,-0.312976,1,-0.147148,0.870450,0
4,0.336666,1,-0.661925,1.345092,1.195554,-1.734400,3,0.679480,1.264099,-1.614853,0,1,-0.735574,0.000354,0,0.065412,0.111338,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10679,1.282849,0,-0.958919,0.163103,0.049648,1.077056,18,-0.313495,-0.388848,0.654671,0,0,-0.340689,0.368655,0,1.928707,-1.339409,2
10680,1.156691,1,-0.837575,0.535913,1.046239,-0.999588,15,0.628308,-1.688281,-1.036199,1,0,1.085830,-1.604777,0,1.376955,-2.275647,2
10681,0.652061,0,1.614788,-0.662677,1.350078,-0.775949,8,-0.703054,-0.602681,1.729313,0,0,-1.564831,0.929352,1,-1.214473,0.710193,3
10682,0.210509,1,0.125253,1.027302,0.976790,0.888029,8,0.148533,-0.108872,0.173713,0,1,-0.933016,-0.290988,0,0.888518,0.204118,3


In [5]:
X_train= train.drop(columns=["medical_condition"])
y_train= train["medical_condition"]
X_val= val.drop(columns=["medical_condition"])
y_val= val["medical_condition"]

In [25]:
MLFLOW_DIR= Path("../mlruns").resolve()
MLFLOW_DIR.mkdir(parents=True, exist_ok=True)
MLFLOW_TRACKING_URI= "file:///" + str(MLFLOW_DIR)
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [34]:
MLFLOW_DIR



PosixPath('/workspaces/Healthcare-Risk-Factors/mlruns')

In [26]:
mlflow.set_experiment("baseline_model")

2025/10/11 10:00:26 INFO mlflow.tracking.fluent: Experiment with name 'baseline_model' does not exist. Creating a new experiment.


<Experiment: artifact_location='file://workspaces/Healthcare-Risk-Factors/mlruns/834183516746379056', creation_time=1760176826321, experiment_id='834183516746379056', last_update_time=1760176826321, lifecycle_stage='active', name='baseline_model', tags={}>

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report ,ConfusionMatrixDisplay   




In [35]:
with mlflow.start_run(run_name="logistic_regression"):
    model= LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1,class_weight="balanced",multi_class="ovr",solver="liblinear")
    model.fit(X_train, y_train)
    y_pred= model.predict(X_val)
    
    
    accuracy= accuracy_score(y_val, y_pred)
    precision= precision_score(y_val, y_pred,average='weighted', zero_division=0)
    recall= recall_score(y_val, y_pred,average='weighted', zero_division=0)
    f1= f1_score(y_val, y_pred,average='weighted', zero_division=0)

    mlflow.log_param("max_iter", 1000)
    mlflow.log_param("class_weight", "balanced")
    mlflow.log_param("multi_class", "ovr")
    mlflow.log_param("solver", "liblinear")
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)
    cm = confusion_matrix(y_val, y_pred)
    cr = classification_report(y_val, y_pred)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", cr)
    
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")




Confusion Matrix:
 [[ 61   1   0   1   0  21   8]
 [  5  95   1   0   1   2   2]
 [  0   2  58   0   0   1   0]
 [  2   1   2 327   0   5   3]
 [  0   0   0   0 160   0   0]
 [ 22   5   0   5   0 335   8]
 [  6   2   0   1   2  10 181]]
Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.66      0.65        92
           1       0.90      0.90      0.90       106
           2       0.95      0.95      0.95        61
           3       0.98      0.96      0.97       340
           4       0.98      1.00      0.99       160
           5       0.90      0.89      0.89       375
           6       0.90      0.90      0.90       202

    accuracy                           0.91      1336
   macro avg       0.89      0.89      0.89      1336
weighted avg       0.91      0.91      0.91      1336

Accuracy: 0.9109
Precision: 0.9119
Recall: 0.9109
F1 Score: 0.9114
