In [121]:
import pandas as pd
from google.colab import drive
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

drive.mount('/content/drive')
root_path = 'drive/MyDrive/Python_Students_SuperProf/Shahin'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# loading the data

In [58]:
gdp_df = pd.read_csv(f'{root_path}/New_gdp.csv', index_col = 0)
npls_df = pd.read_csv(f'{root_path}/New_npls.csv', index_col = 0)


GDP data

In [59]:
gdp_df

Unnamed: 0,Year,GDP growth,Interest rate,Inflation rate
1,1993,-0.852806,10.340071,4.626735
2,1994,2.151024,8.152558,4.051842
3,1995,2.886837,7.920644,5.235423
4,1996,1.266785,7.952253,4.006977
5,1997,1.830212,7.743309,2.043108
6,1998,1.810615,6.094069,1.955086
7,1999,1.625728,4.736636,1.66346
8,2000,3.786955,5.138246,2.537685
9,2001,1.951372,4.128904,2.785165
10,2002,0.253943,3.158739,2.465323


NPLs data

In [60]:
npls_df

Unnamed: 0,Year,Capital Adequacy Ratios (CAR),Non-Performing Loans (NPLs),Bank net interest margin (%),Bank cost to income ratio (%)
1,1993,2.816443,5.340071,4.626735,59.1
2,1994,3.479011,4.152558,4.051842,58.9
3,1995,3.612424,6.920644,5.235423,55.5
4,1996,3.804164,5.952253,4.006977,58.3
5,1997,3.589254,7.743309,2.043108,60.87
6,1998,3.838521,8.996173,1.955086,62.14
7,1999,4.216443,7.568115,1.66346,63.9
8,2000,4.230109,6.777608,2.537685,59.1
9,2001,4.347956,3.282581,2.05941,61.52906
10,2002,4.485619,4.446344,2.189937,60.65203


# Correlation analysis

In [91]:
def correlation_analysis(df_1, df_2, column):
  display(pd.merge(gdp_df, npls_df[['Year',column]], on= 'Year').corr()[column])


In [92]:
correlation_analysis(gdp_df, npls_df, 'Capital Adequacy Ratios (CAR)')

Unnamed: 0,Capital Adequacy Ratios (CAR)
Year,0.935467
GDP growth,-0.135402
Interest rate,-0.822051
Inflation rate,-0.389055
Capital Adequacy Ratios (CAR),1.0


In [93]:
correlation_analysis(gdp_df, npls_df, 'Non-Performing Loans (NPLs)')

Unnamed: 0,Non-Performing Loans (NPLs)
Year,0.313763
GDP growth,-0.193484
Interest rate,-0.063256
Inflation rate,-0.497124
Non-Performing Loans (NPLs),1.0


In [94]:
correlation_analysis(gdp_df, npls_df, 'Bank net interest margin (%)')

Unnamed: 0,Bank net interest margin (%)
Year,-0.738918
GDP growth,0.125125
Interest rate,0.772155
Inflation rate,0.436111
Bank net interest margin (%),1.0


In [95]:
correlation_analysis(gdp_df, npls_df, 'Bank cost to income ratio (%)')

Unnamed: 0,Bank cost to income ratio (%)
Year,0.356041
GDP growth,-0.07054
Interest rate,-0.314008
Inflation rate,-0.53203
Bank cost to income ratio (%),1.0


# Model training

Let combine the two dataframes

In [154]:
total_df = pd.merge(gdp_df, npls_df, on = 'Year')
total_df

Unnamed: 0,Year,GDP growth,Interest rate,Inflation rate,Capital Adequacy Ratios (CAR),Non-Performing Loans (NPLs),Bank net interest margin (%),Bank cost to income ratio (%)
0,1993,-0.852806,10.340071,4.626735,2.816443,5.340071,4.626735,59.1
1,1994,2.151024,8.152558,4.051842,3.479011,4.152558,4.051842,58.9
2,1995,2.886837,7.920644,5.235423,3.612424,6.920644,5.235423,55.5
3,1996,1.266785,7.952253,4.006977,3.804164,5.952253,4.006977,58.3
4,1997,1.830212,7.743309,2.043108,3.589254,7.743309,2.043108,60.87
5,1998,1.810615,6.094069,1.955086,3.838521,8.996173,1.955086,62.14
6,1999,1.625728,4.736636,1.66346,4.216443,7.568115,1.66346,63.9
7,2000,3.786955,5.138246,2.537685,4.230109,6.777608,2.537685,59.1
8,2001,1.951372,4.128904,2.785165,4.347956,3.282581,2.05941,61.52906
9,2002,0.253943,3.158739,2.465323,4.485619,4.446344,2.189937,60.65203


 Data splitting

In [155]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# Separating the feature form the targes
X = total_df[['GDP growth', 'Interest rate', 'Inflation rate']]
y = total_df[['Capital Adequacy Ratios (CAR)', 'Non-Performing Loans (NPLs)',
              'Bank net interest margin (%)', 'Bank cost to income ratio (%)']]

# splitting the data to training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=5, random_state=0)

# to scale the data
scaler = MinMaxScaler()

# scaling the training features
X_train[['GDP growth', 'Interest rate', 'Inflation rate']] = scaler.fit_transform(X_train[['GDP growth', 'Interest rate', 'Inflation rate']])

# scaling the test features based on the paraemters of the training set
X_test[['GDP growth', 'Interest rate', 'Inflation rate']] = scaler.fit_transform(X_test[['GDP growth', 'Interest rate', 'Inflation rate']])


print('X train:', X_train.shape)
print('y train:', y_train.shape)
print('X test:', X_test.shape)
print('y test:', y_test.shape)

X train: (26, 3)
y train: (26, 4)
X test: (5, 3)
y test: (5, 4)


In [166]:
def model_training(X_train, X_test, y_train, y_test, target_name):

  regression_model = LinearRegression().fit(X_train, y_train[target_name])

  # predictions of the model for the test set
  y_pred = regression_model.predict(X_test)

  print(f'Model to predict \'{target_name}\'')
  print('Mean absolute error: ', mean_absolute_error(y_test[target_name], y_pred))
  print('R2 score: ', regression_model.score(X_test, y_test[target_name]))
  print('Regression coeff: ', regression_model.coef_)

## Capital Adequacy Ratios (CAR)

In [167]:
model_training(X_train, X_test, y_train, y_test, 'Capital Adequacy Ratios (CAR)' )

Model to predict 'Capital Adequacy Ratios (CAR)'
Mean absolute error:  0.514058712332896
R2 score:  0.7023357276927169
Regression coeff:  [ 0.02925562 -3.33396548 -1.2108521 ]


## Non-Performing Loans (NPLs)

In [168]:
model_training(X_train, X_test, y_train, y_test, 'Non-Performing Loans (NPLs)' )

Model to predict 'Non-Performing Loans (NPLs)'
Mean absolute error:  4.79683736911165
R2 score:  -17.548872386292395
Regression coeff:  [-8.95423217 -1.63473851 -9.59909033]


## Bank net interest margin (%)

In [170]:
model_training(X_train, X_test, y_train, y_test, 'Bank net interest margin (%)' )

Model to predict 'Bank net interest margin (%)'
Mean absolute error:  0.9105612868863954
R2 score:  0.6237981762183049
Regression coeff:  [0.01928111 2.75047611 1.54111606]


## Bank cost to income ratio (%)

In [171]:
model_training(X_train, X_test, y_train, y_test, 'Bank cost to income ratio (%)' )

Model to predict 'Bank cost to income ratio (%)'
Mean absolute error:  3.9155119671604126
R2 score:  0.4309101940564154
Regression coeff:  [  4.24308284  -5.94122065 -15.37114664]
