In [None]:
# Import Dependencies
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline

In [None]:
# Import DataFrame 
main_df = pd.read_csv('Rates_MO.csv')
main_df.columns

In [None]:
# Create credit card dataset
ccard_df = main_df[['observation_date', 'CCARD_CO', 'CCARD_DELNQ', 'GDP', 'Household_DBT_Inc', 'Consumer_Confidence', 'FEDFUNDS', 'SAVINGS_RATE_MO', 'UNRATE']]

# Create copy for bins
ccard_bin_df = ccard_df.copy()

In [None]:
# Binning the data for classification Question: 
# Should we be using pd cut to get more bins? 
ccard_bin_df["CCARD_CO_BIN"] = pd.qcut(ccard_df['CCARD_CO'],4, labels= [1, 2, 3, 4])
ccard_bin_df["CCARD_DELNQ_BIN"] = pd.qcut(ccard_df['CCARD_DELNQ'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["GDP_BIN"] = pd.qcut(ccard_df['GDP'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["Household_DBT_Inc_BIN"] = pd.qcut(ccard_df['Household_DBT_Inc'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["Consumer_Confidence_BIN"] = pd.qcut(ccard_df['Consumer_Confidence'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["FEDFUNDS_BIN"] = pd.qcut(ccard_df['FEDFUNDS'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["SAVINGS_RATE_MO_BIN"] = pd.qcut(ccard_df['SAVINGS_RATE_MO'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["UNRATE_BIN"] = pd.qcut(ccard_df['UNRATE'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])


In [None]:
# Seperate the y and X variables
y = ccard_bin_df["CCARD_CO_BIN"]
X_bin = ccard_bin_df.drop(columns=["CCARD_CO", "observation_date", "CCARD_DELNQ", "GDP", "Household_DBT_Inc", "Consumer_Confidence", "FEDFUNDS", "SAVINGS_RATE_MO", "UNRATE", "CCARD_CO_BIN"])
X_bin.columns

In [None]:
# Turn data into dummies
X_bin =pd.get_dummies(X_bin)
X = pd.concat([y, X_bin], axis=1)
X

In [None]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X_bin, y, random_state=78)

In [None]:
# Creating StandardScaler instance
scaler = StandardScaler()

# Fitting Standard Scaler
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [None]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)

In [None]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

In [None]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual Low", "Actual Medium-Low", "Actual Medium-High", "High"], columns=["Predicted Low", "Predicted Medium-Low", "Predicted Medium-High", "Predicted High"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [None]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))