<h1>Logistic Regression Model</h1>

In [1]:
#Importing Necessary Libraries

import pandas as pd
import numpy as np
import io
import datetime
import matplotlib.pyplot as mtp
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

In [2]:
#Import the dataset
source_data = pd.read_csv('NPHA-doctor-visits.csv')

In [3]:
#Splitting dataset into independent and dependent variables
source = source_data
X=source.loc[:, source.columns != 'Stress Keeps Patient from Sleeping']
Y=source['Stress Keeps Patient from Sleeping']

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size= 0.2, random_state=42)

In [5]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
logregression = LogisticRegression()
logregression.fit(X_train, Y_train)

In [7]:
Y_pred = logregression.predict(X_test)
Y_probs = logregression.predict_proba(X_test)[:, 1]

In [8]:
results = {
    "Accuracy": metrics.accuracy_score(Y_test, Y_pred),
    "AUC": metrics.roc_auc_score(Y_test, Y_probs),
    "Precision": metrics.precision_score(Y_test, Y_pred),
    "Recall": metrics.recall_score(Y_test, Y_pred),
    "F1 Score": metrics.f1_score(Y_test, Y_pred),
    "MCC": metrics.matthews_corrcoef(Y_test, Y_pred)
}

In [9]:
for metric, value in results.items():
    print(f"{metric}: {value:.4f}")

Accuracy: 0.7063
AUC: 0.7221
Precision: 0.3913
Recall: 0.2432
F1 Score: 0.3000
MCC: 0.1325


In [10]:
with open('logregression.pkl', 'wb') as f:
    pickle.dump(logregression, f)