# Logistic Regression

run grid search on Logistic regression with different thresholds.

In [4]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [6]:
majors = ['main','business','compsci','engineering','law','medical','others']

depression_threshold = 3

def model_assess(model, title="Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f'Accuracy of {title}: {round(accuracy_score(y_test, preds), 5)}')


for major in majors:
    data = pd.read_csv(f'../Data/clean_df_{major}.csv')

    columns_to_drop = ['Course','Gender','Sleep_Quality','Physical_Activity','Diet_Quality','Social_Support','Relationship_Status','Substance_Use','Counseling_Service_Use','Family_History','Chronic_Illness','Extracurricular_Involvement','Residence_Type']
    data = data.drop(columns=[col for col in columns_to_drop if col in data.columns])

    data['Depression_Score'] = data['Depression_Score'].apply(lambda x: 1 if x > depression_threshold else 0)
    y = data['Depression_Score']
    X = data.drop(columns=['Depression_Score'])
    X = X.apply(pd.to_numeric, errors='coerce').dropna()
    y = y[X.index]

    scaler = MinMaxScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    lrModel = LogisticRegression(random_state=42, solver='lbfgs', multi_class='multinomial')
    
    lrModel.fit(X_train, y_train)
    preds = lrModel.predict(X_test)
    print(f'Accuracy of {major} with Logistic Regression: {round(accuracy_score(y_test, preds), 5)}')




Accuracy of main with Logistic Regression: 0.75929
Accuracy of business with Logistic Regression: 0.7933
Accuracy of compsci with Logistic Regression: 0.51172
Accuracy of engineering with Logistic Regression: 0.78652
Accuracy of law with Logistic Regression: 0.80347
Accuracy of medical with Logistic Regression: 0.78053
Accuracy of others with Logistic Regression: 0.7933
