<img src="https://webassets.unt.edu/assets/branding/unt-stacked-logo.svg" alt="UNT | University of North Texas" class="desktop-logo" width="300" height="500">

<div style="text-align: left"><strong>Datasets:</strong> </div>
<br> loan_credit.xlsx

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
import matplotlib.pylab as plt
from matplotlib.pyplot import subplots
%matplotlib inline

In [None]:
#Load Data
credit_df=pd.read_excel('loan_credit.xlsx')
credit_df=credit_df.dropna()
credit_df.head()

In [None]:
credit_df['Loan_Dummy']=pd.get_dummies(credit_df['Loan Status'],drop_first=True)
credit_df['Term_Dummy']=pd.get_dummies(credit_df['Term'],drop_first=True)
credit_df.drop(['Customer ID','Loan Status','Term','Home Ownership','Purpose'],axis=1,inplace=True)
credit_df.head()

In [None]:
credit_df.describe()

In [None]:
credit_df.info()

In [None]:
sns.scatterplot(x='Income', y='Loan_Dummy',data=credit_df) #, hue='Term_Dummy')

In [None]:
sns.scatterplot(x='Credit Score', y='Loan Amount',data=credit_df, hue='Loan_Dummy')

In [None]:
plt.figure(figsize=(10,6))
credit_df[credit_df['Loan_Dummy']==1]['Income'].hist(alpha=0.5,color='blue',
                                                    bins=35,label='Loan_Dummy=1')
credit_df[credit_df['Loan_Dummy']==0]['Income'].hist(alpha=0.5,color='red',
                                                    bins=35,label='Loan_Dummy=0')
plt.legend()
plt.xlabel('Income')

## Decision Tree

In [None]:
X=credit_df.drop(['Loan_Dummy'],axis=1)
y=credit_df['Loan_Dummy']

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.40,random_state=1)

In [None]:
dtree=DecisionTreeClassifier(max_depth=2,random_state=1)
dtree.fit(X_train,y_train)

fig, ax = plt.subplots(figsize=(12, 12))
plot_tree(dtree, filled=True, feature_names=list(X.columns), ax=ax)
plt.show()

In [None]:
#Predictions
predictions=dtree.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(n_estimators=500,max_depth=2,random_state=1)
rfc.fit(X_train,y_train)

In [None]:
#Random Forest predictions
rfc_pred=rfc.predict(X_test)

In [None]:
print(confusion_matrix(y_test,rfc_pred))
print('\n')
print(classification_report(y_test,rfc_pred))

In [None]:
importances=rfc.feature_importances_
std=np.std([tree.feature_importances_ for tree in rfc.estimators_],axis=0)

df=pd.DataFrame({'feature':X_train.columns,'importance':importances,'std':std})
df=df.sort_values('importance')
print(df)

ax=df.plot(kind='barh',xerr='std', x='feature',legend=False)
ax.set_ylabel('')

plt.tight_layout()
plt.show