# Predictive Modeling: Financial Distress Analysis

## 1. Packages


In [1]:
import pandas as  pd
import numpy as np
import plotly.express as px
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV

## 2. Initialise Data

In [2]:
df = pd.read_csv("Financial_Distress.csv")
df

Unnamed: 0,Company,Time,Financial Distress,x1,x2,x3,x4,x5,x6,x7,...,x74,x75,x76,x77,x78,x79,x80,x81,x82,x83
0,1,1,0.010636,1.2810,0.022934,0.87454,1.21640,0.060940,0.188270,0.52510,...,85.437,27.07,26.102,16.000,16.0,0.2,22,0.060390,30,49
1,1,2,-0.455970,1.2700,0.006454,0.82067,1.00490,-0.014080,0.181040,0.62288,...,107.090,31.31,30.194,17.000,16.0,0.4,22,0.010636,31,50
2,1,3,-0.325390,1.0529,-0.059379,0.92242,0.72926,0.020476,0.044865,0.43292,...,120.870,36.07,35.273,17.000,15.0,-0.2,22,-0.455970,32,51
3,1,4,-0.566570,1.1131,-0.015229,0.85888,0.80974,0.076037,0.091033,0.67546,...,54.806,39.80,38.377,17.167,16.0,5.6,22,-0.325390,33,52
4,2,1,1.357300,1.0623,0.107020,0.81460,0.83593,0.199960,0.047800,0.74200,...,85.437,27.07,26.102,16.000,16.0,0.2,29,1.251000,7,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3667,422,10,0.438020,2.2605,0.202890,0.16037,0.18588,0.175970,0.198400,2.22360,...,100.000,100.00,100.000,17.125,14.5,-7.0,37,0.436380,4,41
3668,422,11,0.482410,1.9615,0.216440,0.20095,0.21642,0.203590,0.189870,1.93820,...,91.500,130.50,132.400,20.000,14.5,-16.0,37,0.438020,5,42
3669,422,12,0.500770,1.7099,0.207970,0.26136,0.21399,0.193670,0.183890,1.68980,...,87.100,175.90,178.100,20.000,14.5,-20.2,37,0.482410,6,43
3670,422,13,0.611030,1.5590,0.185450,0.30728,0.19307,0.172140,0.170680,1.53890,...,92.900,203.20,204.500,22.000,22.0,6.4,37,0.500770,7,44


In [3]:
# variables
X = df.drop( columns=["Financial Distress", "Company"] ).copy()
X

Unnamed: 0,Time,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x74,x75,x76,x77,x78,x79,x80,x81,x82,x83
0,1,1.2810,0.022934,0.87454,1.21640,0.060940,0.188270,0.52510,0.018854,0.182790,...,85.437,27.07,26.102,16.000,16.0,0.2,22,0.060390,30,49
1,2,1.2700,0.006454,0.82067,1.00490,-0.014080,0.181040,0.62288,0.006423,0.035991,...,107.090,31.31,30.194,17.000,16.0,0.4,22,0.010636,31,50
2,3,1.0529,-0.059379,0.92242,0.72926,0.020476,0.044865,0.43292,-0.081423,-0.765400,...,120.870,36.07,35.273,17.000,15.0,-0.2,22,-0.455970,32,51
3,4,1.1131,-0.015229,0.85888,0.80974,0.076037,0.091033,0.67546,-0.018807,-0.107910,...,54.806,39.80,38.377,17.167,16.0,5.6,22,-0.325390,33,52
4,1,1.0623,0.107020,0.81460,0.83593,0.199960,0.047800,0.74200,0.128030,0.577250,...,85.437,27.07,26.102,16.000,16.0,0.2,29,1.251000,7,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3667,10,2.2605,0.202890,0.16037,0.18588,0.175970,0.198400,2.22360,1.091500,0.241640,...,100.000,100.00,100.000,17.125,14.5,-7.0,37,0.436380,4,41
3668,11,1.9615,0.216440,0.20095,0.21642,0.203590,0.189870,1.93820,1.000100,0.270870,...,91.500,130.50,132.400,20.000,14.5,-16.0,37,0.438020,5,42
3669,12,1.7099,0.207970,0.26136,0.21399,0.193670,0.183890,1.68980,0.971860,0.281560,...,87.100,175.90,178.100,20.000,14.5,-20.2,37,0.482410,6,43
3670,13,1.5590,0.185450,0.30728,0.19307,0.172140,0.170680,1.53890,0.960570,0.267720,...,92.900,203.20,204.500,22.000,22.0,6.4,37,0.500770,7,44


In [13]:
# output classes
df["Financial Distress"] = pd.to_numeric(df["Financial Distress"], errors='coerce') # change values to float
df.dropna(subset=["Financial Distress"], inplace=True)
df["target"] = df["Financial Distress"].apply(lambda x: "Healthy" if x > -0.5 else "in-Distress") # binary target based on condition
y = pd.get_dummies(df)
y = y["target_in-Distress"]
y


0       False
1       False
2       False
3        True
4       False
        ...  
3667    False
3668    False
3669    False
3670    False
3671    False
Name: target_in-Distress, Length: 3672, dtype: bool

In [5]:
# Split Data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    random_state=1, 
                                                    train_size=0.7, test_size=0.3, 
                                                    shuffle=True)

# 70% of the data for training and the 30% for testing

## 3. Model

In [6]:
# Find the best estimate for C(regularisation parameter) by performing gridsearch
param_grid = {"C": np.arange(1, 20, 1) } #parameter grid
svm = SVC()

grid_search = GridSearchCV(svm, param_grid, cv=10)
grid_search.fit(X_train, y_train)

best_c = grid_search.best_params_["C"]

In [7]:

# Support Vector Machine algorithm 
model = SVC(kernel="linear", C=best_c)
model.fit(X_train, y_train)

## 4. Testing

In [8]:
# Prediction
y_pred = model.predict(X_test)    # prediction for test values
conf_m = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [9]:
print(report)

              precision    recall  f1-score   support

       False       0.97      0.99      0.98      1053
        True       0.48      0.24      0.32        49

    accuracy                           0.95      1102
   macro avg       0.72      0.62      0.65      1102
weighted avg       0.94      0.95      0.95      1102



In [10]:
print(conf_m)

[[1040   13]
 [  37   12]]


In [11]:
# True plot

# choose features for plots 
x_label = "Time"
y_label = "x12"

#
def scat_plot(colour):
    scat = px.scatter(x= X_test[x_label],
                    y= X_test[y_label],
                    color= colour
                    )
    return scat

fig1 = scat_plot(y_test)

fig1.update_layout(xaxis_title = x_label, 
                   yaxis_title = y_label, 
                   legend_title = "In Distress", 
                   title = "True Data",
                   title_x = 0.5
                   )
fig1.show()

In [12]:
# Prediction plot
fig2 = scat_plot(y_pred)

fig2.update_layout(xaxis_title = x_label, 
                   yaxis_title = y_label, 
                   legend_title = "In Distress", 
                   title = "Predicted Data",
                   title_x = 0.5
                   )
fig2.show()