In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [None]:
lending_df = pd.read_csv(Path('Resources/lending_data.csv'))

lending_df.head()

In [None]:
y = lending_df['loan_status']

X = lending_df.drop(columns=['loan_status'])

In [None]:
y.head()

In [None]:
X.head()

In [None]:
y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
from sklearn.linear_model import LogisticRegression

logistic_regression_model = LogisticRegression(solver='lbfgs', random_state=1)

lr_model = logistic_regression_model.fit(X_train, y_train)

In [None]:
test_predictions = logistic_regression_model.predict(X_test)
pd.DataFrame({'Predictions': test_predictions, 'Actual': y_test})

In [None]:
print(f"The balanced accuracy score of the model is: {balanced_accuracy_score(y_test, test_predictions)}")

In [None]:
cf_test_matrix = confusion_matrix(y_test, test_predictions)
cf_test_matrix                             

In [None]:
testing_report = classification_report(y_test, test_predictions)
print(testing_report)

In [None]:
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=1)

X_ros_model, y_ros_model = ros.fit_resample(X,y)

In [None]:
from collections import Counter

print(Counter(X_ros_model))
print(Counter(y_ros_model))
print(f"The y_ros_model resampled data is equivalently split")

In [None]:
classifier = LogisticRegression(solver='lbfgs', random_state=1)

classifier.fit(X_ros_model, y_ros_model)

predictions = classifier.predict(X_ros_model)
pd.DataFrame({'Predictions': predictions, 'Actual': y_ros_model})

In [None]:
classifier = LogisticRegression(solver='lbfgs', random_state=1)

classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)
pd.DataFrame({'Predictions': predictions, 'Actual': y_test})

In [None]:
print(f"The balanced accuracy score of the model is: {balanced_accuracy_score(y_ros_model, predictions)}")

In [None]:
cf_matrix = confusion_matrix(y_ros_model, predictions)
cf_matrix  

In [None]:
report = classification_report(y_ros_model, predictions)
print(report)