In [1]:
# Import dependencies
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [5]:
data = "../Resources/loans_data_encoded.csv"
loans_df = pd.read_csv(data)
loans_df.head(15)


Unnamed: 0,amount,term,age,bad,month_num,education_Bachelor,education_High School or Below,education_Master or Above,education_college,gender_female,gender_male
0,1000,30,45,0,6,0,1,0,0,0,1
1,1000,30,50,0,7,1,0,0,0,1,0
2,1000,30,33,0,8,1,0,0,0,1,0
3,1000,15,27,0,9,0,0,0,1,0,1
4,1000,30,28,0,10,0,0,0,1,1,0
5,300,7,35,0,7,0,0,1,0,0,1
6,1000,30,29,0,9,0,0,0,1,0,1
7,1000,30,36,0,5,0,0,0,1,0,1
8,1000,30,28,0,5,0,0,0,1,0,1
9,800,15,26,0,4,0,0,0,1,0,1


In [11]:
# Define the features

X = loans_df.copy()
X = X.drop('bad', axis=1)
X.head()
X.shape

(500, 10)

In [12]:
# Define the target set

y= loans_df['bad'].values
y[:5]

array([0, 0, 0, 0, 0], dtype=int64)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78, train_size=0.80)

# Determine the shape of the dataset segments
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(400, 10)
(100, 10)
(400,)
(100,)


In [18]:
# scale the data to eliminate bias

scaler = StandardScaler()

# Fitting the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

# Do the scaling
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [19]:
# Create the decicion tree instance.

model = tree.DecisionTreeClassifier()

# Fitting the model

model = model.fit(X_train_scaled, y_train)

In [21]:
# Make predictions with the training data

pred = model.predict(X_test_scaled)
pred

array([1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0], dtype=int64)

In [22]:
# Evaluate the model

cm = confusion_matrix(y_test, pred)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,38,28
Actual 1,18,16


In [23]:
# Calculate the accuracy score

acc_score = accuracy_score(y_test, pred)


In [26]:
# Display the results

print('Confusion Matrix')
display(cm_df)

print(f'Accuracy Score : {acc_score}')
print('Classification Report')
print(classification_report(y_test, pred))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,38,28
Actual 1,18,16


Accuracy Score : 0.54
Classification Report
              precision    recall  f1-score   support

           0       0.68      0.58      0.62        66
           1       0.36      0.47      0.41        34

    accuracy                           0.54       100
   macro avg       0.52      0.52      0.52       100
weighted avg       0.57      0.54      0.55       100

