In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)

#-- Read Test and Train Files Generated in DataPrep.ipynb -------------------------
X_train = pd.read_csv("../data/split/X_train.csv")
X_test = pd.read_csv("../data/split/X_test.csv")  
y_train = pd.read_csv("../data/split/y_train.csv")
y_test = pd.read_csv("../data/split/y_test.csv")

#--Scale Features -------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#-- Train Logistic Regression Model -------------------------
model = LogisticRegression(
    max_iter=3000,
    solver="lbfgs",          # required for multinomial
    multi_class="multinomial",
    n_jobs=-1
)

model.fit(X_train_scaled,y_train)

import pickle
trained_model_path="trained_models/LogisticRegressionModel.pkl"
with open(trained_model_path, "wb") as f:
        pickle.dump(model, f)
       
#--Predict on Test Set -------------------------
y_pred = model.predict(X_test_scaled)
y_prob = model.predict_proba(X_test_scaled)

#-- Evaluate Model -------------------------
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob, multi_class='ovr',average="macro")
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mcc = matthews_corrcoef(y_test, y_pred)

#-- Print Evaluation Metrics -------------------------
metrics_df = pd.DataFrame({
    "ML Model Name": ["Logistic Regression"],
    "Accuracy": [accuracy],
    "AUC": [roc_auc],
    "Precision": [precision],
    "Recall": [recall],
    "F1": [f1],
    "MCC": [mcc]
})

#--Round Metrics For Readibility-------------------------
metrics_df = metrics_df.round(4)

metrics_df


  y = column_or_1d(y, warn=True)


'        \n#--Predict on Test Set -------------------------\ny_pred = model.predict(X_test_scaled)\ny_prob = model.predict_proba(X_test_scaled)\n\n#-- Evaluate Model -------------------------\naccuracy = accuracy_score(y_test, y_pred)\nroc_auc = roc_auc_score(y_test, y_prob, multi_class=\'ovr\',average="macro")\nprecision = precision_score(y_test, y_pred, average=\'macro\')\nrecall = recall_score(y_test, y_pred, average=\'macro\')\nf1 = f1_score(y_test, y_pred, average=\'macro\')\nmcc = matthews_corrcoef(y_test, y_pred)\n\n#-- Print Evaluation Metrics -------------------------\nmetrics_df = pd.DataFrame({\n    "ML Model Name": ["Logistic Regression"],\n    "Accuracy": [accuracy],\n    "AUC": [roc_auc],\n    "Precision": [precision],\n    "Recall": [recall],\n    "F1": [f1],\n    "MCC": [mcc]\n})\n\n#--Round Metrics For Readibility-------------------------\nmetrics_df = metrics_df.round(4)\n\nmetrics_df\n'