# Instructor Do: Decision Trees

In [4]:
# Import Modules
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Needed for decision tree visualization
import pydotplus
from IPython.display import Image


ModuleNotFoundError: No module named 'pydotplus'

## Loading and Preprocessing Loans Encoded Data

In [5]:
# Loading data
file_path = Path("credit_data.csv")
df = pd.read_csv(file_path)

In [6]:
# Split target column from dataset
y = df['credit_risk']
X = df.drop(columns='credit_risk')

# Set Index
X = X.set_index('id')

In [7]:
# Print first five entries for target
y[:5]



0    good
1    good
2    good
3    good
4    good
Name: credit_risk, dtype: object

In [8]:
# Encode the categorical variables using get_dummies
X = pd.get_dummies(X)

X.head()

Unnamed: 0_level_0,duration,amount,age,status_... < 0 DM,status_... >= 200 DM / salary for at least 1 year,status_0<= ... < 200 DM,status_no checking account,credit_history_all credits at this bank paid back duly,credit_history_critical account/other credits elsewhere,credit_history_delay in paying off in the past,...,job_manager/self-empl./highly qualif. employee,job_skilled employee/official,job_unemployed/unskilled - non-resident,job_unskilled - resident,people_liable_0 to 2,people_liable_3 or more,telephone_no,telephone_yes (under customer name),foreign_worker_no,foreign_worker_yes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,18,1049,21,0,0,0,1,1,0,0,...,0,1,0,0,1,0,1,0,1,0
2,9,2799,36,0,0,0,1,1,0,0,...,0,1,0,0,0,1,1,0,1,0
3,12,841,23,1,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,1,0
4,12,2122,39,0,0,0,1,1,0,0,...,0,0,0,1,0,1,1,0,0,1
5,12,2171,38,0,0,0,1,1,0,0,...,0,0,0,1,1,0,1,0,0,1


In [9]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)



In [10]:
# Creating StandardScaler instance
scaler = StandardScaler()



In [11]:
# Fitting Standard Scaler
X_scaler = scaler.fit(X_train)



In [12]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


## Fitting the Decision Tree Model

In [13]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()



In [15]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)



## Making Predictions Using the Tree Model

In [16]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)



## Model Evaluation

In [13]:
# Displaying classification report
print(classification_report(y_test, predictions))


              precision    recall  f1-score   support

         bad       0.47      0.53      0.50        74
        good       0.79      0.75      0.77       176

    accuracy                           0.68       250
   macro avg       0.63      0.64      0.63       250
weighted avg       0.70      0.68      0.69       250



## Visualizing the Decision Tree

In [17]:
# Create DOT data
dot_data = tree.export_graphviz(
    model, out_file=None, feature_names=X.columns, class_names=["0", "1"], filled=True
)

# Draw graph
graph = pydotplus.graph_from_dot_data(dot_data)

# Show graph
Image(graph.create_png())



NameError: name 'pydotplus' is not defined

In [15]:
# Saving the tree as PDF
file_path = ("credit_tree.pdf")
graph.write_pdf(file_path)

# Saving the tree as PNG
file_path = ("credit_tree.png")
graph.write_png(file_path)


True