# Credit Score Prediction using Decision Trees

## Setup

In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import joblib



## Data Preparation

In [2]:
# Load the Data

credit_score_data = pd.read_csv('/kaggle/input/credit-card-score/bankloans_with_scores.csv')
credit_score_data=credit_score_data.dropna(axis=0)

print('Columns : ')
print(credit_score_data.columns)


print('\nData descriptions : ')
print(credit_score_data.describe())



Columns : 
Index(['age', 'education', 'employ', 'address', 'income', 'debtinc',
       'creddebt', 'othdebt', 'default', 'credit_score'],
      dtype='object')

Data descriptions : 
              age   education      employ     address      income     debtinc  \
count  700.000000  700.000000  700.000000  700.000000  700.000000  700.000000   
mean    34.860000    1.722857    8.388571    8.278571   45.601429   10.260571   
std      7.997342    0.928206    6.658039    6.824877   36.814226    6.827234   
min     20.000000    1.000000    0.000000    0.000000   14.000000    0.400000   
25%     29.000000    1.000000    3.000000    3.000000   24.000000    5.000000   
50%     34.000000    1.000000    7.000000    7.000000   34.000000    8.600000   
75%     40.000000    2.000000   12.000000   12.000000   55.000000   14.125000   
max     56.000000    5.000000   31.000000   34.000000  446.000000   41.300000   

         creddebt     othdebt     default  credit_score  
count  700.000000  700.000000 

## Specify Prediction Target

In [3]:
y= credit_score_data.credit_score
print('Prediction Target  : ')
print(y)

print('\nprediction target description : ')
print(y.describe())



Prediction Target  : 
0      575.934522
1      478.334960
2      508.423785
3      548.412256
4      469.753713
          ...    
695    487.659188
696    474.643101
697    486.775347
698    526.285878
699    494.627463
Name: credit_score, Length: 700, dtype: float64

prediction target description : 
count    700.000000
mean     493.803566
std       26.390507
min      458.169459
25%      476.853951
50%      487.120215
75%      502.622108
max      738.395706
Name: credit_score, dtype: float64


## Create X

Now we will create a DataFrame called X holding the predictive features.

We'll use just the following columns in the list : 

* income 
* debtinc :debt-to-income ratio
* creddebt :credit card debt
* othdebt
* age
* employ
* address
* education

In [4]:
features=['income','debtinc','creddebt','othdebt','age','employ','address','education','default']
X=credit_score_data[features]
print('X : ')
print(X)

print('\nFeatures description  : ')
print(X.describe())


X : 
     income  debtinc   creddebt   othdebt  age  employ  address  education  \
0       176      9.3  11.359392  5.008608   41      17       12          3   
1        31     17.3   1.362202  4.000798   27      10        6          1   
2        55      5.5   0.856075  2.168925   40      15       14          1   
3       120      2.9   2.658720  0.821280   41      15       14          1   
4        28     17.3   1.787436  3.056564   24       2        0          2   
..      ...      ...        ...       ...  ...     ...      ...        ...   
695      27      4.6   0.262062  0.979938   36       6       15          2   
696      21     11.5   0.369495  2.045505   29       6        4          2   
697      32      7.6   0.491264  1.940736   33      15        3          1   
698      77      8.4   2.302608  4.165392   45      19       22          1   
699      44     14.7   2.994684  3.473316   37      12       14          1   

     default  
0        1.0  
1        0.0  
2        0.0 

## Build,Train and Evaluate the Model

In [5]:
# Specify the model : 
model=DecisionTreeRegressor()
print("DecisionTreeRegressor ... ")
# Split the data :
print('\nSplit the data ...')
train_x,test_x,train_y,test_y=train_test_split(X,y,random_state=1)

print('\nTrain_x : ')
print(train_x)
print('\nTest_x : ')
print(test_x)
print('\nTrain_y: ')
print(train_y)
print('\nTest_x : ')
print(test_y)
# Fit and  the model :
print('\nFitting the model ...')
model.fit(train_x,train_y)

# Make predictions :
print('\nPredictions ...')
predictions=model.predict(test_x)

#Calculate the Mean Absolute Error in Validation Data
print('\nCalculating the Mean Absolute Error in Validation Data ...')
mea=mean_absolute_error(test_y,predictions)
print('MEA :  ',mea)

DecisionTreeRegressor ... 

Split the data ...

Train_x : 
     income  debtinc  creddebt   othdebt  age  employ  address  education  \
262      18     11.9  0.400554  1.741446   27       8        7          2   
305      18      1.2  0.065880  0.150120   25       9        4          1   
573      15     11.3  0.072885  1.622115   24       3        5          1   
607      36      4.7  0.624348  1.067652   34       3       14          1   
248      40     18.5  1.213600  6.186400   35      15       11          1   
..      ...      ...       ...       ...  ...     ...      ...        ...   
144      68      5.4  0.447984  3.224016   41      16       17          1   
645      42      3.9  1.018836  0.619164   23       0        1          2   
72      100     12.8  4.582400  8.217600   47      26       21          1   
235      18      6.5  0.526500  0.643500   24       7        0          1   
37       54     14.4  3.195936  4.580064   32      12        1          2   

     default  
2

## Save the Model

In [6]:
joblib.dump(model, "credit_scoring_model.pkl")


['credit_scoring_model.pkl']

## Save the Dataframe

In [7]:
credit_score_data.to_csv("cleaned_credit_data.csv", index=False)
df_loaded = pd.read_csv("cleaned_credit_data.csv")

print(df_loaded.head())

   age  education  employ  address  income  debtinc   creddebt   othdebt  \
0   41          3      17       12     176      9.3  11.359392  5.008608   
1   27          1      10        6      31     17.3   1.362202  4.000798   
2   40          1      15       14      55      5.5   0.856075  2.168925   
3   41          1      15       14     120      2.9   2.658720  0.821280   
4   24          2       2        0      28     17.3   1.787436  3.056564   

   default  credit_score  
0      1.0    575.934522  
1      0.0    478.334960  
2      0.0    508.423785  
3      0.0    548.412256  
4      1.0    469.753713  
