# Importing libraries

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Loading Dataset

In [41]:
df=pd.read_csv('CreditWorthiness.csv')

In [42]:
df.columns

Index(['Cbal', 'Cdur', 'Chist', 'Cpur', 'Camt', 'Sbal', 'Edur', 'InRate',
       'MSG', 'Oparties', 'Rdur', 'Prop', 'age', 'inPlans', 'Htype', 'NumCred',
       'JobType', 'Ndepend', 'telephone', 'foreign', 'creditScore'],
      dtype='object')

# Renaming for convenience

In [43]:
df=df.rename(columns={'Cbal':'Credit_Balance'}) # Balance to be paid 
df=df.rename(columns={'Cdur':'Credit_Duration'})
df=df.rename(columns={'Chist':'Credit_History'})
df=df.rename(columns={'Cpur':'Purpose'})
df=df.rename(columns={'Camt':'Loan_Amount'})
df=df.rename(columns={'Sbal':'Savings_Balance'})
df=df.rename(columns={'Edur':'Employment_Duration'})
df=df.rename(columns={'InRate':'Installment_Rate'})
df=df.rename(columns={'MSG':'Marrital_Status_Gender'})
df=df.rename(columns={'Oparties':'Parties_Involved'})
df=df.rename(columns={'Rdur':'Residence_Duration'})
df=df.rename(columns={'Prop':'Property_Type'})
df=df.rename(columns={'age':'Age'})
df=df.rename(columns={'inPlans':'Existing_Plans'})
df=df.rename(columns={'Htype':'Housing_Type'})
df=df.rename(columns={'NumCred':'Num_Existing_Credits'})
df=df.rename(columns={'JobType':'Job'})
df=df.rename(columns={'Ndepend':'Dependents'})
df=df.rename(columns={'telephone':'Phone'})
df=df.rename(columns={'foreign':'Foreign_Worker_Status'})
df=df.rename(columns={'creditScore':'Credit_Score'})


df.to_csv('CreditWorthiness.csv',index=False)

In [44]:
df.columns

Index(['Credit_Balance', 'Credit_Duration', 'Credit_History', 'Purpose',
       'Loan_Amount', 'Savings_Balance', 'Employment_Duration',
       'Installment_Rate', 'Marrital_Status_Gender', 'Parties_Involved',
       'Residence_Duration', 'Property_Type', 'Age', 'Existing_Plans',
       'Housing_Type', 'Num_Existing_Credits', 'Job', 'Dependents', 'Phone',
       'Foreign_Worker_Status', 'Credit_Score'],
      dtype='object')

In [45]:
df.head()

Unnamed: 0,Credit_Balance,Credit_Duration,Credit_History,Purpose,Loan_Amount,Savings_Balance,Employment_Duration,Installment_Rate,Marrital_Status_Gender,Parties_Involved,...,Property_Type,Age,Existing_Plans,Housing_Type,Num_Existing_Credits,Job,Dependents,Phone,Foreign_Worker_Status,Credit_Score
0,0 <= Rs. < 2000,9,all settled till now,Business,13790,Rs. < 1000,1 to 4 years,2,married or widowed male,no one,...,real estate,27,bank,own,1,employee with official position,1,yes,no,good
1,0 <= Rs. < 2000,15,dues not paid earlier,electronics,15250,no savings account,more than 7 years,4,single male,"yes, guarantor",...,real estate,50,none,own,2,employee with official position,1,yes,no,good
2,0 <= Rs. < 2000,36,none taken/all settled,Business,19410,Rs. < 1000,more than 7 years,4,single male,no one,...,Unknown,61,none,free,1,"employed either in management, self or in high...",1,yes,no,bad
3,0 <= Rs. < 2000,48,none taken/all settled,Business,144090,Rs. < 1000,1 to 4 years,2,single male,no one,...,Other cars etc.,25,none,own,1,employee with official position,1,yes,no,bad
4,no checking account,24,all settled till now,electronics,31690,Rs. < 1000,less than 1 year,4,divorced or separated or married female,no one,...,life insurance/building society,26,none,own,1,employee with official position,1,yes,no,good


In [46]:
df.isnull().sum()

Credit_Balance            0
Credit_Duration           0
Credit_History            0
Purpose                   0
Loan_Amount               0
Savings_Balance           0
Employment_Duration       0
Installment_Rate          0
Marrital_Status_Gender    0
Parties_Involved          0
Residence_Duration        0
Property_Type             0
Age                       0
Existing_Plans            0
Housing_Type              0
Num_Existing_Credits      0
Job                       0
Dependents                0
Phone                     0
Foreign_Worker_Status     0
Credit_Score              0
dtype: int64

# Exploring non-numeric data

In [47]:
column_name="Credit_Balance"
unique_values=df[column_name].unique()
print(unique_values)

['0 <= Rs. < 2000' 'no checking account' ' Rs. < 0' 'Rs. >=2000']


In [48]:
column_name="Credit_History"
unique_values=df[column_name].unique()
print(unique_values)

['all settled till now' 'dues not paid earlier' 'none taken/all settled'
 'all settled']


In [49]:
column_name="Purpose"
unique_values=df[column_name].unique()  # Drop
print(unique_values)

['Business' 'electronics' 'renovation' 'second hand vehicle' 'education'
 'new vehicle' 'miscellaneous' 'furniture' 'retaining' 'domestic needs']


In [50]:
column_name="Savings_Balance"
unique_values=df[column_name].unique()
print(unique_values)

['Rs. < 1000' 'no savings account' 'Rs. >= 10,000' '5000 <= Rs. < 10,000'
 '1000 <= Rs. < 5,000']


In [51]:
column_name="Employment_Duration"  
unique_values=df[column_name].unique()
print(unique_values)

['1 to 4 years' 'more than 7 years' 'less than 1 year' '4 to 7 years'
 'not employed']


In [52]:
column_name="Marrital_Status_Gender"
unique_values=df[column_name].unique()
print(unique_values)

['married or widowed male' 'single male'
 'divorced or separated or married female' 'divorced or separated male']


In [53]:
column_name="Parties_Involved"
unique_values=df[column_name].unique()
print(unique_values)

['no one' 'yes, guarantor' 'yes, co-applicant']


In [54]:
column_name="Property_Type"       # Drop
unique_values=df[column_name].unique()
print(unique_values)

['real estate' 'Unknown' 'Other cars etc.'
 'life insurance/building society']


In [55]:
column_name="Existing_Plans"     # Drop
unique_values=df[column_name].unique()
print(unique_values)

['bank' 'none' 'stores']


In [56]:
column_name="Housing_Type"
unique_values=df[column_name].unique()
print(unique_values)

['own' 'free' 'pays rent']


In [57]:
column_name="Job"
unique_values=df[column_name].unique()
print(unique_values)

['employee with official position'
 'employed either in management, self or in high position'
 'resident unskilled' 'non resident either unemployed or  unskilled ']


In [58]:
column_name="Phone"                   # Residence duration -- Drop
unique_values=df[column_name].unique()    # Drop
print(unique_values)

['yes' 'no']


In [59]:
column_name="Foreign_Worker_Status"
unique_values=df[column_name].unique()   
print(unique_values)

['no' 'yes']


In [60]:
column_name="Credit_Score"
unique_values=df[column_name].unique()
print(unique_values)

['good' 'bad']


# Preprocessing

In [61]:
df.loc[df["Credit_History"]=="all settled till now","Credit_History"]=1
df.loc[df["Credit_History"]=="none taken/all settled","Credit_History"]=1
df.loc[df["Credit_History"]=="all settled","Credit_History"]=1
df.loc[df["Credit_History"]=="dues not paid earlier","Credit_History"]=0

In [62]:
print(df["Credit_History"].unique())

[1 0]


In [63]:
df.loc[df["Credit_Score"]=="good","Credit_Score"]=1
df.loc[df["Credit_Score"]=="bad","Credit_Score"]=0

In [64]:
print(df["Credit_Score"].unique())

[1 0]


In [65]:
df.loc[df["Foreign_Worker_Status"]=="yes","Foreign_Worker_Status"]=1
df.loc[df["Foreign_Worker_Status"]=="no","Foreign_Worker_Status"]=0

In [66]:
print(df["Foreign_Worker_Status"].unique())

[0 1]


In [67]:
df.loc[df["Parties_Involved"]=="no one","Parties_Involved"]="None"
df.loc[df["Parties_Involved"]=="yes, guarantor","Parties_Involved"]="Guarantor"
df.loc[df["Parties_Involved"]=="yes, co-applicant","Parties_Involved"]="Co_applicant"

In [68]:
print(df["Parties_Involved"].unique())

['None' 'Guarantor' 'Co_applicant']


# Dropping

In [69]:
df=df.drop(["Purpose"],axis=1) # Phone # Residence_Duration # Existing_Plans # Property_Type # Purpose
df=df.drop(["Phone"],axis=1)
df=df.drop(["Residence_Duration"],axis=1)
df=df.drop(["Existing_Plans"],axis=1)
df=df.drop(["Property_Type"],axis=1)

In [70]:
df.head()

Unnamed: 0,Credit_Balance,Credit_Duration,Credit_History,Loan_Amount,Savings_Balance,Employment_Duration,Installment_Rate,Marrital_Status_Gender,Parties_Involved,Age,Housing_Type,Num_Existing_Credits,Job,Dependents,Foreign_Worker_Status,Credit_Score
0,0 <= Rs. < 2000,9,1,13790,Rs. < 1000,1 to 4 years,2,married or widowed male,,27,own,1,employee with official position,1,0,1
1,0 <= Rs. < 2000,15,0,15250,no savings account,more than 7 years,4,single male,Guarantor,50,own,2,employee with official position,1,0,1
2,0 <= Rs. < 2000,36,1,19410,Rs. < 1000,more than 7 years,4,single male,,61,free,1,"employed either in management, self or in high...",1,0,0
3,0 <= Rs. < 2000,48,1,144090,Rs. < 1000,1 to 4 years,2,single male,,25,own,1,employee with official position,1,0,0
4,no checking account,24,1,31690,Rs. < 1000,less than 1 year,4,divorced or separated or married female,,26,own,1,employee with official position,1,0,1


In [71]:
dummies1=pd.get_dummies(df.Credit_Balance,dtype=int) 
df=pd.concat([df,dummies1],axis='columns')
df.head()

Unnamed: 0,Credit_Balance,Credit_Duration,Credit_History,Loan_Amount,Savings_Balance,Employment_Duration,Installment_Rate,Marrital_Status_Gender,Parties_Involved,Age,Housing_Type,Num_Existing_Credits,Job,Dependents,Foreign_Worker_Status,Credit_Score,Rs. < 0,0 <= Rs. < 2000,Rs. >=2000,no checking account
0,0 <= Rs. < 2000,9,1,13790,Rs. < 1000,1 to 4 years,2,married or widowed male,,27,own,1,employee with official position,1,0,1,0,1,0,0
1,0 <= Rs. < 2000,15,0,15250,no savings account,more than 7 years,4,single male,Guarantor,50,own,2,employee with official position,1,0,1,0,1,0,0
2,0 <= Rs. < 2000,36,1,19410,Rs. < 1000,more than 7 years,4,single male,,61,free,1,"employed either in management, self or in high...",1,0,0,0,1,0,0
3,0 <= Rs. < 2000,48,1,144090,Rs. < 1000,1 to 4 years,2,single male,,25,own,1,employee with official position,1,0,0,0,1,0,0
4,no checking account,24,1,31690,Rs. < 1000,less than 1 year,4,divorced or separated or married female,,26,own,1,employee with official position,1,0,1,0,0,0,1


In [72]:
df=df.drop(["Credit_Balance"],axis=1)

In [73]:
dummies2=pd.get_dummies(df.Savings_Balance,dtype=int) 
df=pd.concat([df,dummies2],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Savings_Balance,Employment_Duration,Installment_Rate,Marrital_Status_Gender,Parties_Involved,Age,Housing_Type,...,Credit_Score,Rs. < 0,0 <= Rs. < 2000,Rs. >=2000,no checking account,"1000 <= Rs. < 5,000","5000 <= Rs. < 10,000",Rs. < 1000,"Rs. >= 10,000",no savings account
0,9,1,13790,Rs. < 1000,1 to 4 years,2,married or widowed male,,27,own,...,1,0,1,0,0,0,0,1,0,0
1,15,0,15250,no savings account,more than 7 years,4,single male,Guarantor,50,own,...,1,0,1,0,0,0,0,0,0,1
2,36,1,19410,Rs. < 1000,more than 7 years,4,single male,,61,free,...,0,0,1,0,0,0,0,1,0,0
3,48,1,144090,Rs. < 1000,1 to 4 years,2,single male,,25,own,...,0,0,1,0,0,0,0,1,0,0
4,24,1,31690,Rs. < 1000,less than 1 year,4,divorced or separated or married female,,26,own,...,1,0,0,0,1,0,0,1,0,0


In [74]:
df=df.drop(["Savings_Balance"],axis=1)

In [75]:
dummies3=pd.get_dummies(df.Employment_Duration,dtype=int) 
df=pd.concat([df,dummies3],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Employment_Duration,Installment_Rate,Marrital_Status_Gender,Parties_Involved,Age,Housing_Type,Num_Existing_Credits,...,"1000 <= Rs. < 5,000","5000 <= Rs. < 10,000",Rs. < 1000,"Rs. >= 10,000",no savings account,1 to 4 years,4 to 7 years,less than 1 year,more than 7 years,not employed
0,9,1,13790,1 to 4 years,2,married or widowed male,,27,own,1,...,0,0,1,0,0,1,0,0,0,0
1,15,0,15250,more than 7 years,4,single male,Guarantor,50,own,2,...,0,0,0,0,1,0,0,0,1,0
2,36,1,19410,more than 7 years,4,single male,,61,free,1,...,0,0,1,0,0,0,0,0,1,0
3,48,1,144090,1 to 4 years,2,single male,,25,own,1,...,0,0,1,0,0,1,0,0,0,0
4,24,1,31690,less than 1 year,4,divorced or separated or married female,,26,own,1,...,0,0,1,0,0,0,0,1,0,0


In [76]:
df=df.drop(["Employment_Duration"],axis=1)

In [77]:
dummies4=pd.get_dummies(df.Marrital_Status_Gender,dtype=int) 
df=pd.concat([df,dummies4],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Installment_Rate,Marrital_Status_Gender,Parties_Involved,Age,Housing_Type,Num_Existing_Credits,Job,...,no savings account,1 to 4 years,4 to 7 years,less than 1 year,more than 7 years,not employed,divorced or separated male,divorced or separated or married female,married or widowed male,single male
0,9,1,13790,2,married or widowed male,,27,own,1,employee with official position,...,0,1,0,0,0,0,0,0,1,0
1,15,0,15250,4,single male,Guarantor,50,own,2,employee with official position,...,1,0,0,0,1,0,0,0,0,1
2,36,1,19410,4,single male,,61,free,1,"employed either in management, self or in high...",...,0,0,0,0,1,0,0,0,0,1
3,48,1,144090,2,single male,,25,own,1,employee with official position,...,0,1,0,0,0,0,0,0,0,1
4,24,1,31690,4,divorced or separated or married female,,26,own,1,employee with official position,...,0,0,0,1,0,0,0,1,0,0


In [78]:
df=df.drop(["Marrital_Status_Gender"],axis=1)

In [79]:
dummies5=pd.get_dummies(df.Parties_Involved,dtype=int) 
df=pd.concat([df,dummies5],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Installment_Rate,Parties_Involved,Age,Housing_Type,Num_Existing_Credits,Job,Dependents,...,less than 1 year,more than 7 years,not employed,divorced or separated male,divorced or separated or married female,married or widowed male,single male,Co_applicant,Guarantor,None
0,9,1,13790,2,,27,own,1,employee with official position,1,...,0,0,0,0,0,1,0,0,0,1
1,15,0,15250,4,Guarantor,50,own,2,employee with official position,1,...,0,1,0,0,0,0,1,0,1,0
2,36,1,19410,4,,61,free,1,"employed either in management, self or in high...",1,...,0,1,0,0,0,0,1,0,0,1
3,48,1,144090,2,,25,own,1,employee with official position,1,...,0,0,0,0,0,0,1,0,0,1
4,24,1,31690,4,,26,own,1,employee with official position,1,...,1,0,0,0,1,0,0,0,0,1


In [80]:
df=df.drop(["Parties_Involved"],axis=1)

In [81]:
dummies6=pd.get_dummies(df.Job,dtype=int) 
df=pd.concat([df,dummies6],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Installment_Rate,Age,Housing_Type,Num_Existing_Credits,Job,Dependents,Foreign_Worker_Status,...,divorced or separated or married female,married or widowed male,single male,Co_applicant,Guarantor,None,"employed either in management, self or in high position",employee with official position,non resident either unemployed or unskilled,resident unskilled
0,9,1,13790,2,27,own,1,employee with official position,1,0,...,0,1,0,0,0,1,0,1,0,0
1,15,0,15250,4,50,own,2,employee with official position,1,0,...,0,0,1,0,1,0,0,1,0,0
2,36,1,19410,4,61,free,1,"employed either in management, self or in high...",1,0,...,0,0,1,0,0,1,1,0,0,0
3,48,1,144090,2,25,own,1,employee with official position,1,0,...,0,0,1,0,0,1,0,1,0,0
4,24,1,31690,4,26,own,1,employee with official position,1,0,...,1,0,0,0,0,1,0,1,0,0


In [82]:
df=df.drop(["Job"],axis=1)

In [83]:
dummies7=pd.get_dummies(df.Housing_Type,dtype=int) 
df=pd.concat([df,dummies7],axis='columns')
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Installment_Rate,Age,Housing_Type,Num_Existing_Credits,Dependents,Foreign_Worker_Status,Credit_Score,...,Co_applicant,Guarantor,None,"employed either in management, self or in high position",employee with official position,non resident either unemployed or unskilled,resident unskilled,free,own,pays rent
0,9,1,13790,2,27,own,1,1,0,1,...,0,0,1,0,1,0,0,0,1,0
1,15,0,15250,4,50,own,2,1,0,1,...,0,1,0,0,1,0,0,0,1,0
2,36,1,19410,4,61,free,1,1,0,0,...,0,0,1,1,0,0,0,1,0,0
3,48,1,144090,2,25,own,1,1,0,0,...,0,0,1,0,1,0,0,0,1,0
4,24,1,31690,4,26,own,1,1,0,1,...,0,0,1,0,1,0,0,0,1,0


In [84]:
df=df.drop(["Housing_Type"],axis=1)

In [85]:
df.head()

Unnamed: 0,Credit_Duration,Credit_History,Loan_Amount,Installment_Rate,Age,Num_Existing_Credits,Dependents,Foreign_Worker_Status,Credit_Score,Rs. < 0,...,Co_applicant,Guarantor,None,"employed either in management, self or in high position",employee with official position,non resident either unemployed or unskilled,resident unskilled,free,own,pays rent
0,9,1,13790,2,27,1,1,0,1,0,...,0,0,1,0,1,0,0,0,1,0
1,15,0,15250,4,50,2,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0
2,36,1,19410,4,61,1,1,0,0,0,...,0,0,1,1,0,0,0,1,0,0
3,48,1,144090,2,25,1,1,0,0,0,...,0,0,1,0,1,0,0,0,1,0
4,24,1,31690,4,26,1,1,0,1,0,...,0,0,1,0,1,0,0,0,1,0


In [86]:
df.columns

Index(['Credit_Duration', 'Credit_History', 'Loan_Amount', 'Installment_Rate',
       'Age', 'Num_Existing_Credits', 'Dependents', 'Foreign_Worker_Status',
       'Credit_Score', ' Rs. < 0', '0 <= Rs. < 2000', 'Rs. >=2000',
       'no checking account', '1000 <= Rs. < 5,000', '5000 <= Rs. < 10,000',
       'Rs. < 1000', 'Rs. >= 10,000', 'no savings account', '1 to 4 years',
       '4 to 7 years', 'less than 1 year', 'more than 7 years', 'not employed',
       'divorced or separated male', 'divorced or separated or married female',
       'married or widowed male', 'single male', 'Co_applicant', 'Guarantor',
       'None', 'employed either in management, self or in high position',
       'employee with official position',
       'non resident either unemployed or  unskilled ', 'resident unskilled',
       'free', 'own', 'pays rent'],
      dtype='object')

In [87]:
x=df[['Credit_Duration', 'Credit_History', 'Loan_Amount', 'Installment_Rate',
     'Age', 'Num_Existing_Credits', 'Dependents', 'Foreign_Worker_Status',
     ' Rs. < 0', '0 <= Rs. < 2000', 'Rs. >=2000',
       'no checking account', '1000 <= Rs. < 5,000', '5000 <= Rs. < 10,000',
       'Rs. < 1000', 'Rs. >= 10,000', 'no savings account', '1 to 4 years',
       '4 to 7 years', 'less than 1 year', 'more than 7 years', 'not employed',
       'divorced or separated male', 'divorced or separated or married female',
       'married or widowed male', 'single male', 'Co_applicant', 'Guarantor',
       'None', 'employed either in management, self or in high position',
       'employee with official position',
       'non resident either unemployed or  unskilled ', 'resident unskilled',
       'free', 'own', 'pays rent']]
y=df['Credit_Score']

In [88]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [89]:
model1=RandomForestClassifier(n_estimators=200,random_state=42)
model2=LogisticRegression(random_state=0)
model3=SVC(kernel="rbf", gamma=0.5, C=1.0)
model4=KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )  
model5=GaussianNB()

In [90]:
if y_train.dtype != 'int':
    y_train = y_train.astype(int)
if y_test.dtype != 'int':
    y_test = y_test.astype(int)

In [92]:
model1.fit(x_train,y_train)

In [93]:
model2.fit(x_train,y_train)

In [94]:
model3.fit(x_train,y_train)

In [95]:
model4.fit(x_train,y_train)

In [96]:
model5.fit(x_train,y_train)

In [104]:
y_pred1=model1.predict(x_test)

In [105]:
y_pred2=model2.predict(x_test)

In [106]:
y_pred3=model3.predict(x_test)

In [107]:
y_pred4=model4.predict(x_test.values)



In [108]:
y_pred5=model5.predict(x_test)

In [109]:
accuracy1=accuracy_score(y_test,y_pred1)
print(accuracy1)

0.825


In [110]:
accuracy2=accuracy_score(y_test,y_pred2)
print(accuracy2)

0.745


In [111]:
accuracy3=accuracy_score(y_test,y_pred1)
print(accuracy3)

0.825


In [112]:
accuracy4=accuracy_score(y_test,y_pred1)
print(accuracy4)

0.825


In [113]:
accuracy5=accuracy_score(y_test,y_pred1)
print(accuracy5)

0.825


In [114]:
import joblib 
joblib.dump(model1, 'filename.pkl')

In [255]:
classification_rep = classification_report(y_test, y_pred)

In [256]:
print(classification_rep)

              precision    recall  f1-score   support

           0       0.74      0.57      0.65        56
           1       0.85      0.92      0.88       144

    accuracy                           0.82       200
   macro avg       0.80      0.75      0.77       200
weighted avg       0.82      0.82      0.82       200

