In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder,LabelEncoder,StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
df = pd.read_excel("CreditWorthiness.xlsx")

In [3]:
x= df[['Cbal', 'Cdur', 'Chist', 'Camt', 'Edur', 'InRate', 'MSG', 'Rdur',
       'Prop', 'age', 'inPlans', 'Htype', 'NumCred', 'JobType', 'Ndepend',
       'telephone', 'foreign']]

In [4]:
y = df['creditScore']

0 Cbal
1 Cdur
2 Chist
3 Camt
4 Edur
5 InRate
6 MSG
7 Rdur
8 Prop
9 age
10 inPlans
11 Htype
12 NumCred
13 JobType
14 Ndepend
15 telephone
16 foreign

In [5]:
trf1 = ColumnTransformer(transformers=[
    ('label',OneHotEncoder(),['Prop','inPlans','JobType','foreign','telephone']),
    ('Cbal',OrdinalEncoder(categories=[['no checking account',' Rs. < 0','0 <= Rs. < 2000','Rs. >=2000']]),['Cbal']),
    ('chist',OrdinalEncoder(categories=[['all settled till now','dues not paid earlier','all settled','none taken/all settled']]),['Chist']),
    ('edur',OrdinalEncoder(categories=[['1 to 4 years','more than 7 years','4 to 7 years','less than 1 year','not employed']]),['Edur']),
    ('msg',OrdinalEncoder(categories=[['single male','divorced or separated or married female','married or widowed male','divorced or separated male']]),['MSG']),
    ('Rdur', OrdinalEncoder(categories=[['more than 3 years','1 to 2 years','2 to 3 years','less than a year']]),['Rdur']),
    ('htype',OrdinalEncoder(categories=[['own','pays rent','free']]),['Htype']),
    ('cust',StandardScaler(),['Camt'])
],remainder='passthrough')


In [6]:
trf2 = GradientBoostingClassifier()

In [7]:
from sklearn.pipeline import make_pipeline

In [8]:
pipe = make_pipeline(trf1,trf2)

In [9]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1,random_state=42)

In [10]:
pipe.fit(x_train,y_train)

In [11]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,pipe.predict(x_test))

0.78

In [18]:
from sklearn.model_selection import cross_val_score
np.mean(cross_val_score(pipe,x,y,cv=5))

0.752

In [13]:
# import pickle
# pickle.dump(pipe,open('model.pkl','wb'))

In [14]:
dic= {'Cbal': '0 <= Rs. < 2000',
 'Cdur': 9,
 'Chist': 'all settled till now',
 'Camt': 13790,
 'Edur': '1 to 4 years',
 'InRate': 2,
 'MSG': 'married or widowed male',
 'Rdur': 'less than a year',
 'Prop': 'real estate',
 'age': 27,
 'inPlans': 'bank',
 'Htype': 'own',
 'NumCred': 1,
 'JobType': 'employee with official position',
 'Ndepend': 1,
 'telephone': 'yes',
 'foreign': 'no'}

In [15]:
ip =  pd.DataFrame([dic])

In [16]:
ip

Unnamed: 0,Cbal,Cdur,Chist,Camt,Edur,InRate,MSG,Rdur,Prop,age,inPlans,Htype,NumCred,JobType,Ndepend,telephone,foreign
0,0 <= Rs. < 2000,9,all settled till now,13790,1 to 4 years,2,married or widowed male,less than a year,real estate,27,bank,own,1,employee with official position,1,yes,no


In [17]:
pipe.predict(ip)

array(['good'], dtype=object)