In [None]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression

# Load the dataset
df = pd.read_csv('MLF_GP1_CreditScore.csv',encoding = "ISO-8859-1")
df

In [None]:
df.dtypes

In [3]:
# Checking for missing values
df.isnull().sum()


Sales/Revenues              0
Gross Margin                0
EBITDA                      0
EBITDA Margin               0
Net Income Before Extras    0
Total Debt                  0
Net Debt                    0
LT Debt                     0
ST Debt                     0
Cash                        0
Free Cash Flow              0
Total Debt/EBITDA           0
Net Debt/EBITDA             0
Total MV                    0
Total Debt/MV               0
Net Debt/MV                 0
CFO/Debt                    0
CFO                         0
Interest Coverage           0
Total Liquidity             0
Current Liquidity           0
Current Liabilities         0
EPS Before Extras           0
PE                          0
ROA                         0
ROE                         0
InvGrd                      0
Rating                      0
dtype: int64

In [4]:

# Split the data into training and testing sets

X = df.iloc[:, :-2]  # All columns except the last two
y_InvGrd = df.iloc[:, -2]  # second-to-last column
y_Rating = df.iloc[:, -1]  # Last column

X_train, X_test, y_InvGrd_train, y_InvGrd_test, y_Rating_train, y_Rating_test = train_test_split(
    X, y_InvGrd, y_Rating, test_size=0.2, random_state=42) # 80% training and 20% test


### Linear Regression Approach

In [6]:
##Ridge Regularisation 

# Train the model
ridge = Ridge(alpha=1)
ridge.fit(X_train, y_InvGrd_train)

# Test the model
y_InvGrd_pred = ridge.predict(X_test)
y_InvGrd_pred[y_InvGrd_pred <= 0.5] = 0  # Set threshold to 0.5
y_InvGrd_pred[y_InvGrd_pred > 0.5] = 1
accuracy = accuracy_score(y_InvGrd_test, y_InvGrd_pred)

print("Accuracy of Linear regression with Ridge regularization : ", accuracy)


Accuracy of Linear regression with Ridge regularization :  0.7676470588235295


In [7]:
##Lasso Regularisation

# Train the model
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_InvGrd_train)

# Test the model
y_InvGrd_pred = lasso.predict(X_test)
y_InvGrd_pred[y_InvGrd_pred <= 0.5] = 0  # Set threshold to 0.5
y_InvGrd_pred[y_InvGrd_pred > 0.5] = 1
accuracy = accuracy_score(y_InvGrd_test, y_InvGrd_pred)

print("Accuracy of Linear regression with Lasso regularization:", accuracy)


Accuracy of Linear regression with Lasso regularization: 0.7529411764705882


### Logistic Regression Approach

In [9]:
##Ridge Regularisation 

lr_ridge = LogisticRegression(penalty='l2', solver='liblinear', C=0.1)
lr_ridge.fit(X_train, y_InvGrd_train)
y_InvGrd_pred_ridge = lr_ridge.predict(X_test)
accuracy_ridge = accuracy_score(y_InvGrd_test, y_InvGrd_pred_ridge)
print("Accuracy of Logistic regression with Ridge regularization :", accuracy_ridge)


## Lasso regularization
lr_lasso = LogisticRegression(penalty='l1', solver='liblinear', C=0.1)
lr_lasso.fit(X_train, y_InvGrd_train)
y_InvGrd_pred_lasso = lr_lasso.predict(X_test)
accuracy_lasso = accuracy_score(y_InvGrd_test, y_InvGrd_pred_lasso)
print("Accuracy of Logistic regression with Lasso regularization :", accuracy_lasso)

Accuracy of Logistic regression with Ridge regularization : 0.7647058823529411
Accuracy of Logistic regression with Lasso regularization : 0.7617647058823529


### Neural Networks Approach

For the neural network approach, i will be loading the dataset again

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder



In [11]:
# Load the dataset
df = pd.read_csv('MLF_GP1_CreditScore.csv',encoding = "ISO-8859-1")
df

Unnamed: 0,Sales/Revenues,Gross Margin,EBITDA,EBITDA Margin,Net Income Before Extras,Total Debt,Net Debt,LT Debt,ST Debt,Cash,...,Interest Coverage,Total Liquidity,Current Liquidity,Current Liabilities,EPS Before Extras,PE,ROA,ROE,InvGrd,Rating
0,-0.005496,0.030763,0.018885,0.024515,0.146849,-0.029710,-0.019296,-0.042648,0.049875,-0.133716,...,0.136748,0.392143,-0.184887,0.062781,0.148305,0.100409,0.163266,0.102521,1,A1
1,-0.005496,0.030763,0.088716,0.094733,0.146849,-0.029710,-0.019296,-0.042648,0.049875,-0.133716,...,0.214657,0.392143,-0.184887,0.062781,0.148305,-0.089598,0.163266,0.102521,1,A1
2,-0.007045,0.023159,0.088716,0.096440,0.108590,0.039410,0.034268,0.009059,0.250371,0.101315,...,0.205290,0.483257,-0.017877,0.121357,0.110656,-0.045142,0.105711,0.103378,1,A1
3,-0.009396,0.028400,0.088716,0.099046,0.146137,0.030071,0.036938,-0.016964,0.356994,-0.052606,...,0.232991,0.996955,-0.122017,0.079051,0.151639,-0.008231,0.162421,0.132295,1,A1
4,-0.009009,0.027714,0.088716,0.098611,0.123500,0.024224,0.034445,-0.034132,0.461894,-0.090869,...,0.172906,1.711426,-0.161561,0.084319,0.130435,0.015528,0.156427,0.225144,1,A1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1695,-0.099327,-0.010702,-0.127046,-0.030785,-4.349569,0.124830,0.146400,0.008407,0.364676,-0.028333,...,-0.190974,0.603271,-0.150779,0.144187,-4.408257,-1.339781,-4.271318,-5.168673,0,Caa1
1696,-0.116919,-0.009799,-0.155183,-0.043333,-2.937747,0.157873,0.066243,0.084104,0.309846,0.808505,...,-0.246259,0.007110,0.604043,0.127468,-2.977064,-1.695900,-2.868086,-3.429429,0,Caa1
1697,-0.099676,0.067595,-0.170022,-0.078134,-1.960264,0.023226,-0.201398,0.024856,0.020078,1.352542,...,-0.266848,-0.959809,1.227363,0.056198,-1.955285,-1.919739,-1.876336,-1.940995,0,Caa1
1698,-0.088853,0.007820,-0.122645,-0.037088,-2.066509,0.067495,-0.011377,0.289219,-0.171802,0.671224,...,-0.226685,-0.438389,0.865331,-0.104059,-2.059347,-1.462429,-2.027254,-2.059961,0,Caa1


In [12]:
# Defining the features and its target variables
X = df.iloc[:, :-2].values
y_rating = df.iloc[:, -1].values
y_invgrd = df.iloc[:, -2].values

In [13]:
#splitting the data into train  and test set of 80% and 20% respectively
X_train, X_test, y_rating_train, y_rating_test, y_invgrd_train, y_invgrd_test = train_test_split(X, y_rating, y_invgrd, test_size=0.2, random_state=42)

In [14]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Convert string labels to integer labels
label_encoder = LabelEncoder()
y_rating_train = label_encoder.fit_transform(y_rating_train)
y_rating_test = label_encoder.transform(y_rating_test)

# One-hot encode rating target variable
y_rating_train = to_categorical(y_rating_train)
y_rating_test = to_categorical(y_rating_test)


In [15]:
# Defining the model architecture
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='softmax'))


In [16]:
# Compiling
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training
model.fit(X_train, y_rating_train, epochs=50, batch_size=32, validation_data=(X_test, y_rating_test))

# Evaluation
_, accuracy = model.evaluate(X_test, y_rating_test)
print('Neural network Accuracy: %.2f%%' % (accuracy*100))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Neural network Accuracy: 22.35%


Based on the results gotten,

Accuracy of Linear regression with Ridge regularization :  0.7676470588235295 approximately 0.7676

Accuracy of Linear regression with Lasso regularization: 0.7529411764705882 approximately 0.7529

Accuracy of Logistic regression with Ridge regularization : 0.7647058823529411 approximately 0.7647

Accuracy of Logistic regression with Lasso regularization : 0.7617647058823529 approximately 0.7618

Neural network Accuracy: 0.2235 

The following observations regarding the effectiveness and suitability of each approach for the given problem could be deduced :

Linear regression with Ridge regularization and Logistic regression with Ridge regularization have similar accuracy scores of 0.7676 and 0.7647 respectively. These results suggest that both these approaches are equally effective in predicting whether the firm is in an investment grade or not.

Linear regression with Lasso regularization and Logistic regression with Lasso regularization also have similar accuracy scores of 0.7529 and 0.7618 respectively. However, these scores are slightly lower than the accuracy scores of the Ridge regularization models. This could be because Lasso regularization tends to produce sparse models, which might not be well suited for this problem.

The neural network approach has a significantly lower accuracy score of 0.2235(22.35%). This indicates that the neural network model might not be suitable for this problem. However, it is possible that the model could be enhanced with further fine-tuning and optimization.

In summary, based on the given results, it can be concluded that the Ridge regularization models are the most effective and suitable approaches for predicting whether the firm is in an investment grade or not. However, further analysis and experimentation might be necessary to confirm these findings.