In [1]:
# Import Dependencies
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline

In [2]:
# Import DataFrame 
main_df = pd.read_csv('Rates_MO.csv')
main_df.columns

Index(['Unnamed: 0', 'observation_date', 'C&I_DELNQ', 'CCARD_CO',
       'CCARD_DELNQ', 'CORP_DEBT_NET_WORTH', 'CORP_SAVINGS_LEVEL', 'CRE_CO',
       'CRE_DELNQ', 'GDP', 'Homeowner_Vacancy_rate', 'Household_DBT_Inc',
       'Mortgage_CO', 'Mortgage_DELNQ', 'Rental_Vacancy_Rate',
       'Consumer_Confidence', 'FEDFUNDS', 'Manufacturing_Confidence',
       'SAVINGS_RATE_MO', 'UNRATE', 'C&I_CO'],
      dtype='object')

In [3]:
# Create credit card dataset
ccard_df = main_df[['observation_date', 'CCARD_CO', 'CCARD_DELNQ', 'GDP', 'Household_DBT_Inc', 'Consumer_Confidence', 'FEDFUNDS', 'SAVINGS_RATE_MO', 'UNRATE']]

In [4]:
ccard_df.dtypes

observation_date        object
CCARD_CO               float64
CCARD_DELNQ            float64
GDP                    float64
Household_DBT_Inc      float64
Consumer_Confidence    float64
FEDFUNDS               float64
SAVINGS_RATE_MO        float64
UNRATE                 float64
dtype: object

In [5]:
ccard_bin_df = ccard_df.copy()

In [6]:
# Find Minimum and Maximum values to determine number of bins
a=ccard_df['CCARD_CO'].max()
b=ccard_df['CCARD_CO'].min()
c=ccard_df['CCARD_CO'].count()
print(a)
print(b)
print(c)

10.51
1.66
390


In [7]:
# Binning the data for classification Question: 
# Should we be using pd cut to get more bins? 
ccard_bin_df["CCARD_CO_BIN"] = pd.qcut(ccard_df['CCARD_CO'],4, labels= [1, 2, 3, 4])
ccard_bin_df["CCARD_DELNQ_BIN"] = pd.qcut(ccard_df['CCARD_DELNQ'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["GDP_BIN"] = pd.qcut(ccard_df['GDP'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["Household_DBT_Inc_BIN"] = pd.qcut(ccard_df['Household_DBT_Inc'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["Consumer_Confidence_BIN"] = pd.qcut(ccard_df['Consumer_Confidence'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["FEDFUNDS_BIN"] = pd.qcut(ccard_df['FEDFUNDS'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["SAVINGS_RATE_MO_BIN"] = pd.qcut(ccard_df['SAVINGS_RATE_MO'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])
ccard_bin_df["UNRATE_BIN"] = pd.qcut(ccard_df['UNRATE'],4, labels= ['low', 'medium-low', 'medium-high', 'high'])


In [8]:
ccard_bin_df.head()

Unnamed: 0,observation_date,CCARD_CO,CCARD_DELNQ,GDP,Household_DBT_Inc,Consumer_Confidence,FEDFUNDS,SAVINGS_RATE_MO,UNRATE,CCARD_CO_BIN,CCARD_DELNQ_BIN,GDP_BIN,Household_DBT_Inc_BIN,Consumer_Confidence_BIN,FEDFUNDS_BIN,SAVINGS_RATE_MO_BIN,UNRATE_BIN
0,1991-01-01,4.16,5.26,-1.9,11.578032,66.8,6.91,9.4,6.4,3,high,low,medium-high,low,high,high,medium-high
1,1991-02-01,4.16,5.26,-1.9,11.578032,70.4,6.25,9.0,6.6,3,high,low,medium-high,low,high,high,medium-high
2,1991-03-01,4.16,5.26,-1.9,11.578032,87.7,6.12,8.1,6.8,3,high,low,medium-high,medium-low,high,high,high
3,1991-04-01,4.6,5.48,3.2,11.434237,81.8,5.91,8.7,6.7,3,high,medium-high,medium-high,medium-low,high,high,medium-high
4,1991-05-01,4.6,5.48,3.2,11.434237,78.3,5.78,8.5,6.9,3,high,medium-high,medium-high,medium-low,high,high,high


In [9]:
# Seperate the y and X variables
y = ccard_bin_df["CCARD_CO_BIN"]
X_bin = ccard_bin_df.drop(columns=["CCARD_CO", "observation_date", "CCARD_DELNQ", "GDP", "Household_DBT_Inc", "Consumer_Confidence", "FEDFUNDS", "SAVINGS_RATE_MO", "UNRATE", "CCARD_CO_BIN"])
X_bin.columns

Index(['CCARD_DELNQ_BIN', 'GDP_BIN', 'Household_DBT_Inc_BIN',
       'Consumer_Confidence_BIN', 'FEDFUNDS_BIN', 'SAVINGS_RATE_MO_BIN',
       'UNRATE_BIN'],
      dtype='object')

In [10]:
# Turn data into dummies
X_bin =pd.get_dummies(X_bin)
X = pd.concat([y, X_bin], axis=1)
X

Unnamed: 0,CCARD_CO_BIN,CCARD_DELNQ_BIN_low,CCARD_DELNQ_BIN_medium-low,CCARD_DELNQ_BIN_medium-high,CCARD_DELNQ_BIN_high,GDP_BIN_low,GDP_BIN_medium-low,GDP_BIN_medium-high,GDP_BIN_high,Household_DBT_Inc_BIN_low,...,FEDFUNDS_BIN_medium-high,FEDFUNDS_BIN_high,SAVINGS_RATE_MO_BIN_low,SAVINGS_RATE_MO_BIN_medium-low,SAVINGS_RATE_MO_BIN_medium-high,SAVINGS_RATE_MO_BIN_high,UNRATE_BIN_low,UNRATE_BIN_medium-low,UNRATE_BIN_medium-high,UNRATE_BIN_high
0,3,0,0,0,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0
1,3,0,0,0,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0
2,3,0,0,0,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
3,3,0,0,0,1,0,0,1,0,0,...,0,1,0,0,0,1,0,0,1,0
4,3,0,0,0,1,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,1,1,0,0,0,0,1,0,0,1,...,1,0,1,0,0,0,1,0,0,0
386,1,1,0,0,0,0,1,0,0,1,...,1,0,0,1,0,0,1,0,0,0
387,1,0,1,0,0,0,1,0,0,1,...,1,0,0,1,0,0,1,0,0,0
388,1,0,1,0,0,0,1,0,0,1,...,0,1,0,1,0,0,1,0,0,0


In [11]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X_bin, y, random_state=78)

In [12]:
# Creating StandardScaler instance
scaler = StandardScaler()

# Fitting Standard Scaler
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
len(X_train_scaled)

292

In [14]:
# Create a random forest classifier
cc_model = RandomForestClassifier(n_estimators=100, random_state=78)

In [15]:
# Fitting the model
cc_model = cc_model.fit(X_train_scaled, y_train)

In [16]:
# Making predictions using the testing data
predictions = cc_model.predict(X_test_scaled)

In [17]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual Low", "Actual Medium-Low", "Actual Medium-High", "High"], columns=["Predicted Low", "Predicted Medium-Low", "Predicted Medium-High", "Predicted High"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [18]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted Low,Predicted Medium-Low,Predicted Medium-High,Predicted High
Actual Low,24,1,0,0
Actual Medium-Low,1,19,4,0
Actual Medium-High,0,0,21,3
High,0,1,1,23


Accuracy Score : 0.8877551020408163
Classification Report
              precision    recall  f1-score   support

           1       0.96      0.96      0.96        25
           2       0.90      0.79      0.84        24
           3       0.81      0.88      0.84        24
           4       0.88      0.92      0.90        25

    accuracy                           0.89        98
   macro avg       0.89      0.89      0.89        98
weighted avg       0.89      0.89      0.89        98



# Part 2: Logistic Regression Model

In [19]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

In [20]:
# Train the data
classifier.fit(X_train, y_train)

In [21]:
# Predict outcomes for test data set
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

Unnamed: 0,Prediction,Actual
384,1,1
206,4,3
132,4,4
226,4,4
223,4,4
...,...,...
300,1,1
293,1,1
71,4,3
370,1,1


In [22]:
from sklearn.metrics import accuracy_score
# Display the accuracy score for the test dataset.
accuracy_score(y_test, predictions)

0.8061224489795918

# Part 3: Deep Learning Attempt

In [12]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler

In [13]:
ccard_df.columns

Index(['observation_date', 'CCARD_CO', 'CCARD_DELNQ', 'GDP',
       'Household_DBT_Inc', 'Consumer_Confidence', 'FEDFUNDS',
       'SAVINGS_RATE_MO', 'UNRATE'],
      dtype='object')

In [14]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X_bin, y, random_state=78)

In [15]:
# Creating StandardScaler instance
scaler = MinMaxScaler()

# Fitting Standard Scaler
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [16]:
print(X_train_scaled)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 1. 0.]
 [0. 0. 1. ... 0. 0. 1.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 1. 0.]
 [0. 0. 1. ... 1. 0. 0.]]


In [17]:
print("X_train set shape: ", X_train_scaled.shape)
print("y_train set shape: ", y_train.shape)

X_train set shape:  (292, 28)
y_train set shape:  (292,)


In [18]:
y_train.values

[1, 3, 2, 2, 1, ..., 2, 1, 3, 1, 2]
Length: 292
Categories (4, int64): [1 < 2 < 3 < 4]

In [19]:
num_classes = 5
y_train = to_categorical(y_train.values, num_classes)
y_test = to_categorical(y_test.values, num_classes)
y_train[0]

array([0., 1., 0., 0., 0.], dtype=float32)

In [174]:
keras.backend.clear_session()
model = Sequential()

number_input_features = X_train_scaled.shape[1]
hidden_nodes_layers1 = 60
hidden_nodes_layers2 = 60
hidden_nodes_layers3 = 60
hidden_nodes_layers5 = 40


model.add(Dense(20, activation='relu', input_dim=number_input_features))
model.add(Dense(hidden_nodes_layers1, activation='relu'))
model.add(Dense(hidden_nodes_layers2, activation='relu'))
model.add(Dense(hidden_nodes_layers3, activation='relu'))
model.add(Dense(hidden_nodes_layers4, activation='relu'))
# model.add(Dense(hidden_nodes_layers5, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 20)                580       
                                                                 
 dense_1 (Dense)             (None, 60)                1260      
                                                                 
 dense_2 (Dense)             (None, 60)                3660      
                                                                 
 dense_3 (Dense)             (None, 60)                3660      
                                                                 
 dense_4 (Dense)             (None, 50)                3050      
                                                                 
 dense_5 (Dense)             (None, 5)                 255       
                                                                 
Total params: 12465 (48.69 KB)
Trainable params: 12465 (

In [175]:
# Compile the model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [172]:
# Train the model
model.fit(
    X_train_scaled,
    y_train,
    epochs=200,
    shuffle=True,
    verbose=2
)

Epoch 1/200
10/10 - 1s - loss: 1.5451 - accuracy: 0.2979 - 604ms/epoch - 60ms/step
Epoch 2/200
10/10 - 0s - loss: 1.4560 - accuracy: 0.2877 - 13ms/epoch - 1ms/step
Epoch 3/200
10/10 - 0s - loss: 1.3856 - accuracy: 0.3733 - 13ms/epoch - 1ms/step
Epoch 4/200
10/10 - 0s - loss: 1.2917 - accuracy: 0.4692 - 12ms/epoch - 1ms/step
Epoch 5/200
10/10 - 0s - loss: 1.1497 - accuracy: 0.6781 - 13ms/epoch - 1ms/step
Epoch 6/200
10/10 - 0s - loss: 0.9504 - accuracy: 0.7295 - 13ms/epoch - 1ms/step
Epoch 7/200
10/10 - 0s - loss: 0.7539 - accuracy: 0.7877 - 12ms/epoch - 1ms/step
Epoch 8/200
10/10 - 0s - loss: 0.5813 - accuracy: 0.8322 - 13ms/epoch - 1ms/step
Epoch 9/200
10/10 - 0s - loss: 0.4688 - accuracy: 0.8459 - 12ms/epoch - 1ms/step
Epoch 10/200
10/10 - 0s - loss: 0.4064 - accuracy: 0.8801 - 12ms/epoch - 1ms/step
Epoch 11/200
10/10 - 0s - loss: 0.3568 - accuracy: 0.8870 - 12ms/epoch - 1ms/step
Epoch 12/200
10/10 - 0s - loss: 0.3266 - accuracy: 0.8938 - 12ms/epoch - 1ms/step
Epoch 13/200
10/10 - 0s

Epoch 101/200
10/10 - 0s - loss: 0.0962 - accuracy: 0.9692 - 12ms/epoch - 1ms/step
Epoch 102/200
10/10 - 0s - loss: 0.0844 - accuracy: 0.9726 - 12ms/epoch - 1ms/step
Epoch 103/200
10/10 - 0s - loss: 0.0664 - accuracy: 0.9726 - 11ms/epoch - 1ms/step
Epoch 104/200
10/10 - 0s - loss: 0.0490 - accuracy: 0.9760 - 12ms/epoch - 1ms/step
Epoch 105/200
10/10 - 0s - loss: 0.0517 - accuracy: 0.9726 - 13ms/epoch - 1ms/step
Epoch 106/200
10/10 - 0s - loss: 0.0507 - accuracy: 0.9726 - 12ms/epoch - 1ms/step
Epoch 107/200
10/10 - 0s - loss: 0.0689 - accuracy: 0.9795 - 12ms/epoch - 1ms/step
Epoch 108/200
10/10 - 0s - loss: 0.0737 - accuracy: 0.9726 - 12ms/epoch - 1ms/step
Epoch 109/200
10/10 - 0s - loss: 0.0497 - accuracy: 0.9760 - 11ms/epoch - 1ms/step
Epoch 110/200
10/10 - 0s - loss: 0.0518 - accuracy: 0.9692 - 12ms/epoch - 1ms/step
Epoch 111/200
10/10 - 0s - loss: 0.0462 - accuracy: 0.9795 - 12ms/epoch - 1ms/step
Epoch 112/200
10/10 - 0s - loss: 0.0508 - accuracy: 0.9795 - 12ms/epoch - 1ms/step
Epoc

Epoch 200/200
10/10 - 0s - loss: 0.0530 - accuracy: 0.9795 - 13ms/epoch - 1ms/step


<keras.src.callbacks.History at 0x1ecfed9f5b0>

In [173]:
# Evaluate the model using the test data
model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

4/4 - 0s - loss: 0.9297 - accuracy: 0.8469 - 89ms/epoch - 22ms/step
Loss: 0.9297380447387695, Accuracy: 0.8469387888908386
