In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
data = {'salary':[59000,9600,9878,98787,65500,54400,34125,89765,27354,43562],
       'age':[22,21,25,23,24,30,22,24,40,34],
       'loan_status':[0,0,0,0,1,0,0,1,0,0]}
df = pd.DataFrame(data,columns=data)
df

Unnamed: 0,salary,age,loan_status
0,59000,22,0
1,9600,21,0
2,9878,25,0
3,98787,23,0
4,65500,24,1
5,54400,30,0
6,34125,22,0
7,89765,24,1
8,27354,40,0
9,43562,34,0


In [3]:
features = df.drop('loan_status',axis=1)
target = df['loan_status']

In [4]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='auto', k_neighbors=1)

In [5]:
balanced_features, balanced_target = smote.fit_resample(features, target)

In [6]:
display(features.shape)
display(target.shape)
display(balanced_features.shape)
display(balanced_target.shape)

(10, 2)

(10,)

(16, 2)

(16,)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(balanced_features, balanced_target, test_size=0.20, random_state=42)

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
logit = LogisticRegression()

In [10]:
logit.fit(X_train,y_train)

In [11]:
prediction = logit.predict(X_test)
prediction

array([1, 0, 0, 1], dtype=int64)

In [12]:
X_test

Unnamed: 0,salary,age
0,59000,22
1,9600,21
5,54400,30
14,71490,24


In [13]:
y_test

0     0
1     0
5     0
14    1
Name: loan_status, dtype: int64

In [14]:
df_with_pred = X_test.copy()
df_with_pred['loan_status'] = y_test
df_with_pred['prediction'] = prediction

In [15]:
df_with_pred

Unnamed: 0,salary,age,loan_status,prediction
0,59000,22,0,1
1,9600,21,0,0
5,54400,30,0,0
14,71490,24,1,1


In [16]:
all_combined = pd.concat([df,df_with_pred.rename({'salary':'salary_pred', 'age':'age_pred', 'loan_status':'loan_status_pred'},axis=1)],axis=1)
all_combined

Unnamed: 0,salary,age,loan_status,salary_pred,age_pred,loan_status_pred,prediction
0,59000.0,22.0,0.0,59000.0,22.0,0.0,1.0
1,9600.0,21.0,0.0,9600.0,21.0,0.0,0.0
2,9878.0,25.0,0.0,,,,
3,98787.0,23.0,0.0,,,,
4,65500.0,24.0,1.0,,,,
5,54400.0,30.0,0.0,54400.0,30.0,0.0,0.0
6,34125.0,22.0,0.0,,,,
7,89765.0,24.0,1.0,,,,
8,27354.0,40.0,0.0,,,,
9,43562.0,34.0,0.0,,,,


In [17]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       0.50      1.00      0.67         1

    accuracy                           0.75         4
   macro avg       0.75      0.83      0.73         4
weighted avg       0.88      0.75      0.77         4



In [18]:
filterred = all_combined[(~all_combined['prediction'].isnull()) & (~all_combined['salary'].isnull())]
filterred

Unnamed: 0,salary,age,loan_status,salary_pred,age_pred,loan_status_pred,prediction
0,59000.0,22.0,0.0,59000.0,22.0,0.0,1.0
1,9600.0,21.0,0.0,9600.0,21.0,0.0,0.0
5,54400.0,30.0,0.0,54400.0,30.0,0.0,0.0


In [19]:
print(classification_report(filterred['loan_status'],filterred['prediction']))

              precision    recall  f1-score   support

         0.0       1.00      0.67      0.80         3
         1.0       0.00      0.00      0.00         0

    accuracy                           0.67         3
   macro avg       0.50      0.33      0.40         3
weighted avg       1.00      0.67      0.80         3



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
