# Modeling

In [1]:
# Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Read csv file
data = pd.read_csv("data/data.csv")

In [3]:
# Split the data into training and testing sets and apply necessary scaling
# Separate fetures and target
X = data.drop("T", axis=1)
y = data["T"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Build, train, and evaluate the model
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

# Create classifiers
classifiers = [
    ("Logistic Regression", LogisticRegression(random_state=42)),
    ("Decision Tree", DecisionTreeClassifier(random_state=42)),
    ("Random Forest", RandomForestClassifier(random_state=42)),
    ("Gradient Boosting", GradientBoostingClassifier(random_state=42))
]

# Function to train and evaluate a classifier

def evaluate_classifier(name, clf, X_train, y_train, X_test, y_test):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"{name}: ")
    print(f"   Accuracy: {acuracy:.2f}")
    print(f"   Precision: {precision:.2f}")
    print(f"   Recall: {recall:.2f}")
    print(f"   F1-score: {f1:.2f}")
    print(f"   Classification Report:")
    print(classification_report(y_test, y_pred))
    print(f"   Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\n")

# Evaluate classifiers
for name, clf in classifiers:
    evaluate_classifier(name, clf, X_train, y_train, X_test, y_test)

Logistic Regression: 
   Accuracy: 0.83
   Precision: 0.62
   Recall: 0.12
   F1-score: 0.20
   Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.98      0.90       557
           1       0.62      0.12      0.20       123

    accuracy                           0.83       680
   macro avg       0.73      0.55      0.55       680
weighted avg       0.80      0.83      0.78       680

   Confusion Matrix:
[[548   9]
 [108  15]]


Decision Tree: 
   Accuracy: 0.73
   Precision: 0.28
   Recall: 0.33
   F1-score: 0.31
   Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.82      0.83       557
           1       0.28      0.33      0.31       123

    accuracy                           0.73       680
   macro avg       0.57      0.57      0.57       680
weighted avg       0.75      0.73      0.74       680

   Confusion Matrix:
[[454 103]
 [ 82  41]]


Random Forest: 
   Acc

In [8]:
# Train the Logistic Regression model on the entire dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Best Model
logistic_regression_model = LogisticRegression(random_state=42)
logistic_regression_model.fit(X_scaled, y)

decison_tree_model = DecisionTreeClassifier(random_state=42)
decison_tree_model.fit(X_scaled, y)

random_forest_model = RandomForestClassifier(random_state=42)
random_forest_model.fit(X_scaled, y)

gradient_boosting_model = GradientBoostingClassifier(random_state=42)
gradient_boosting_model.fit(X_scaled, y)

In [6]:
import joblib

# Save the model and the scaler to files
joblib.dump(logistic_regression_model, "model/logistic_regression_model.pkl")
joblib.dump(decison_tree_model, "model/decison_tree_model.pkl")
joblib.dump(random_forest_model, "model/random_forest_model.pkl")
joblib.dump(gradient_boosting_model, "model/gradient_boosting_model.pkl")
joblib.dump(scaler, "model/scaler.pkl")

# Load the model and the scaler from files
loaded_model = joblib.load("model/logistic_regression_model.pkl")
loaded_scaler = joblib.load("model/scaler.pkl")

# Create a sample test data point
sample_data = pd.DataFrame({
    "Age": [60],
    "DM": [1],
    "TG": [145],
    "HT": [1],
    "HDL": [50],
    "AC":[105]
})

# Scale the sample data using the loaded scaler
sample_data_scaled = loaded_scaler.transform(sample_data)

# Make a prediction using the loade model
prediction = loaded_model.predict(sample_data_scaled)

# Print the prediction
print("Prediction", prediction)

Prediction [0]
