In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.linear_model import ElasticNet, LogisticRegression, RidgeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

In [2]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

In [3]:
X_train, X_test = train.drop(columns=['loan_approved']), test.drop(columns=['loan_approved']) 
y_train, y_test = train['loan_approved'], test['loan_approved']

In [4]:
X_train.head()

Unnamed: 0,income,credit_score,loan_amount,years_employed,points
0,-0.868786,1.272426,-0.398622,-1.558591,0.453156
1,0.630376,-0.139933,0.692489,0.038773,-0.344081
2,0.416243,-0.016583,-1.200687,0.459133,0.18741
3,-0.21058,-0.670339,0.908299,-0.633801,-0.875573
4,-0.437968,1.642477,1.059245,0.206917,0.984648


In [5]:
y_train.head()

0    1.0
1    0.0
2    1.0
3    0.0
4    1.0
Name: loan_approved, dtype: float64

In [6]:
lr = LogisticRegression(random_state=0).fit(X_train, y_train)
preds = lr.predict(X_test) # Here returns the predictions.

In [7]:
accuracy = accuracy_score(y_test, preds) # Measures the proportion of correctly classified instances
print(f"Accuracy of Logistic Regression: {np.round(accuracy, 2)}")

Accuracy of Logistic Regression: 1.0


In [8]:
f1 = f1_score(y_test, preds) # Harmonic mean of precision and recall
print(f"F1 Score of Logistic Regression: {np.round(f1,2)}")

F1 Score of Logistic Regression: 1.0


In [9]:
roc_auc = roc_auc_score(y_test, preds) # Evaluates the performance of a binary classifier across various classification thresholds
print(f"AUC of Logistic Regression: {np.round(roc_auc,2)}")

AUC of Logistic Regression: 1.0


In [10]:
rc = RidgeClassifier()
rc.fit(X_train, y_train)
preds= rc.predict(X_test) # Here you get the predictions too.

In [11]:
accuracy = accuracy_score(y_test, preds) 
print(f"Accuracy of RidgeClassifier: {np.round(accuracy,2)}")

Accuracy of RidgeClassifier: 0.95


In [12]:
f1 = f1_score(y_test, preds) 
print(f"F1 Score of RidgeClassifier: {np.round(f1,2)}")

F1 Score of RidgeClassifier: 0.94


In [13]:
roc_auc = roc_auc_score(y_test, preds)
print(f"AUC of RidgeClassifier: {np.round(roc_auc,2)}")

AUC of RidgeClassifier: 0.94


In [14]:
rf = DecisionTreeClassifier(random_state=0)
rf.fit(X_train, y_train)
preds=rf.predict(X_test)

In [15]:
accuracy = accuracy_score(y_test, preds) 
print(f"Accuracy of DecisionTreeClassifier: {np.round(accuracy,2)}")

Accuracy of DecisionTreeClassifier: 1.0


In [16]:
f1 = f1_score(y_test, preds) 
print(f"F1 Score of DecisionTreeClassifier: {np.round(f1,2)}")

F1 Score of DecisionTreeClassifier: 1.0


In [17]:
roc_auc = roc_auc_score(y_test, preds)
print(f"AUC of DecisionTreeClassifier: {np.round(roc_auc,2)}")

AUC of DecisionTreeClassifier: 1.0


In [18]:
ab = AdaBoostClassifier(n_estimators=100, random_state=0)
ab.fit(X_train, y_train)
preds = ab.predict(X_test)

In [19]:
accuracy = accuracy_score(y_test, preds) 
print(f"Accuracy of AdaBoostClassifier: {np.round(accuracy,2)}")

Accuracy of AdaBoostClassifier: 1.0


In [20]:
f1 = f1_score(y_test, preds) 
print(f"F1 Score of AdaBoostClassifier: {np.round(f1,2)}")

F1 Score of AdaBoostClassifier: 1.0


In [21]:
roc_auc = roc_auc_score(y_test, preds)
print(f"AUC of AdaBoostClassifier: {np.round(roc_auc,2)}")

AUC of AdaBoostClassifier: 1.0


In [None]:
clf = xgb.XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')
clf.fit(X_train, y_train)
preds = clf.predict(X_test)

In [27]:
accuracy = accuracy_score(y_test, preds) 
print(f"Accuracy of XGBClassifier: {np.round(accuracy,2)}")

Accuracy of XGBClassifier: 1.0


In [28]:
f1 = f1_score(y_test, preds) 
print(f"F1 Score of XGBClassifier: {np.round(f1,2)}")

F1 Score of XGBClassifier: 1.0


In [29]:
roc_auc = roc_auc_score(y_test, preds)
print(f"AUC of XGBClassifier: {np.round(roc_auc,2)}")

AUC of XGBClassifier: 1.0
