# Bachelor's thesis: Logistic Regression model

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score,roc_curve, brier_score_loss

## Loading the data 

In [2]:
accepts = pd.DataFrame()
accepts = pd.read_csv('../data/New_accepts.csv',encoding = "ISO-8859-1", low_memory=False)
rejects = pd.DataFrame()
rejects = pd.read_csv('../data/New_rejects.csv',encoding = "ISO-8859-1", low_memory=False)

In [3]:
X_acc = accepts.copy()
X_rej = rejects.copy()
y_rej = X_rej.pop("loan_status")
y_acc = X_acc.pop("loan_status")

## Preprocessing

In [4]:
X_acc = pd.get_dummies(data=X_acc, columns = ['addr_state'], drop_first = True)
X_rej = pd.get_dummies(data=X_rej, columns = ['addr_state'], drop_first = True)

In [5]:
scaler= StandardScaler()
scaler.fit(X_acc)
X_acc=scaler.transform(X_acc)
X_rej=scaler.transform(X_rej)

### Here you can set the penalty parameter for the logistic regression

In [6]:
penalty_parameter = 2**2

## Model

In [7]:
model = LogisticRegression(C=penalty_parameter, solver='liblinear', random_state=42)
model.fit(X_acc, y_acc)
yhat = model.predict_proba(X_rej)[:,1]

## Results

In [8]:
auc = roc_auc_score(y_rej, yhat)
print("AUC:", auc)

AUC: 0.5515464387085224


In [9]:
brier = brier_score_loss(y_rej, yhat,pos_label=1)
print("Brier score:", brier)

Brier score: 0.3979275718912215


In [10]:
fpr, tpr, thresholds = roc_curve(y_rej, yhat,pos_label=1) 
ks_statistic = max(tpr - fpr)
print("KS-Statistic:",ks_statistic)

KS-Statistic: 0.07523986838267976
