<a href="https://colab.research.google.com/github/RishabhKedia10/trackingRecession/blob/main/notebooks/Support-Vector-Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

df = pd.read_csv('drive/MyDrive/recession/recession_data.csv')
df.shape

(787, 23)

In [None]:
# Drop the 'Dates' column for the correlation calculation
df_no_dates = df.drop(columns=['Dates'])

# Compute the correlation matrix for the remaining columns
corr_matrix = df_no_dates.corr()

# Extract correlation values with 'Recession' and sort them
corr_with_recession = corr_matrix['Recession'].sort_values(ascending=False)

# Display the top 12 features (excluding 'Recession' itself)
top_12_features = corr_with_recession[1:13].index
print("Top 12 features correlated with Recession:")
top_12_features

# Visualize the correlation with a heatmap
# import seaborn as sns
# import matplotlib.pyplot as plt

# plt.figure(figsize=(10, 6))
# sns.heatmap(df_no_dates[top_12_features].corr(), annot=True, cmap='coolwarm')
# plt.title('Correlation Matrix of Top 12 Features')
# plt.show()

Top 12 features correlated with Recession:


Index(['S&P_500_3mo_vs_12mo', 'CPI_12mo_pct_chg', 'Unemployment_Rate_12mo_chg',
       '3M_10Y_Treasury_Spread_12mo_chg', 'CPI_3mo_pct_chg_annualized',
       '10Y_Treasury_Rate_12mo_chg', 'Unemployment_Rate',
       'Real_Fed_Funds_Rate', 'CPI_3mo_vs_12mo',
       'Real_Fed_Funds_Rate_12mo_chg', '3M_10Y_Treasury_Spread',
       '3M_Treasury_Rate_12mo_chg'],
      dtype='object')

In [None]:
from sklearn.model_selection import train_test_split

# Select the top 12 features
X = df[top_12_features]
y = df['Recession']

# Split the data into 75% training and 25% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print(f'Training set size: {X_train.shape[0]} rows')
print(f'Test set size: {X_test.shape[0]} rows')

Training set size: 590 rows
Test set size: 197 rows


## 1. SVM

In [None]:
from sklearn.svm import SVC

In [None]:
# Initialize the SVM model with a linear kernel
svm_model = SVC(kernel='linear', random_state=42)

# Train the model on the training data
svm_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred_svm = svm_model.predict(X_test)

In [None]:
y_pred_svm

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
from sklearn.metrics import accuracy_score

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
# print(f'Accuracy with SVM (Linear Kernel): {accuracy_svm:.2f}')
accuracy_svm

0.9187817258883249

## 2. SVM with GridSearch

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'C': [10, 50, 100, 150],
    'gamma': ['scale', 0.3, 0.2, 0.1],
    'kernel': ['rbf', 'linear']
}

# Initialize GridSearchCV
grid_search_svm = GridSearchCV(SVC(random_state=42), param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)

# Fit the model
grid_search_svm.fit(X_train, y_train)

# Get the best parameters and accuracy
best_params_svm = grid_search_svm.best_params_
best_accuracy_svm = grid_search_svm.best_score_

Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [None]:
best_params_svm

{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}

In [None]:
best_accuracy_svm

0.9423728813559322