In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier , AdaBoostClassifier
from sklearn.metrics import classification_report , confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [51]:
df = pd.read_csv(r'C:/Users/Shakil/Downloads/Compressed/Credit_Card_Applications.csv')
df.head()

Unnamed: 0,CustomerID,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,Class
0,15776156,1,22.08,11.46,2,4,4,1.585,0,0,0,1,2,100,1213,0
1,15739548,0,22.67,7.0,2,8,4,0.165,0,0,0,0,2,160,1,0
2,15662854,0,29.58,1.75,1,4,4,1.25,0,0,0,1,2,280,1,0
3,15687688,0,21.67,11.5,1,5,3,0.0,1,1,11,1,2,0,1,1
4,15715750,1,20.17,8.17,2,6,4,1.96,1,1,14,0,2,60,159,1


In [52]:
# drop column
df.drop(['CustomerID'], axis=1, inplace=True)

#### set feature and target

In [53]:
x = df.drop('Class', axis=1)
y = df['Class']

In [54]:
# split the data
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=42)

#### train and test the models

In [55]:
models = [RandomForestClassifier(),
         GradientBoostingClassifier(),
         AdaBoostClassifier(),
         GaussianNB(),
         LogisticRegression(),
         KNeighborsClassifier(),
         DecisionTreeClassifier(),
         MLPClassifier(),
         SVC()]

for model in models:
    model.fit(xtrain, ytrain)
    ypred = model.predict(xtest)
    score = accuracy_score(ypred,ytest)
    model_name = model.__class__.__name__
    #print(f'{model_name} - Precision: {score:.2f}')
    print(f'{model_name}', score)

RandomForestClassifier 0.8840579710144928
GradientBoostingClassifier 0.8768115942028986
AdaBoostClassifier 0.8623188405797102
GaussianNB 0.8188405797101449
LogisticRegression 0.8115942028985508
KNeighborsClassifier 0.7028985507246377
DecisionTreeClassifier 0.8623188405797102
MLPClassifier 0.8405797101449275
SVC 0.6956521739130435


### standardization

In [56]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [57]:
# split the data
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=42)

In [58]:
models = [RandomForestClassifier(),
         GradientBoostingClassifier(),
         AdaBoostClassifier(),
         GaussianNB(),
         LogisticRegression(),
         KNeighborsClassifier(),
         DecisionTreeClassifier(),
         MLPClassifier(),
         SVC()]

trained_models = {}
    
for model in models:
    model.fit(xtrain, ytrain)
    ypred = model.predict(xtest)
    score = accuracy_score(ypred, ytest)
    model_name = model.__class__.__name__
    trained_models[model_name] = model
    print(f'{model_name}', score)

RandomForestClassifier 0.8623188405797102
GradientBoostingClassifier 0.8768115942028986
AdaBoostClassifier 0.8623188405797102
GaussianNB 0.8188405797101449
LogisticRegression 0.8768115942028986
KNeighborsClassifier 0.8333333333333334
DecisionTreeClassifier 0.855072463768116
MLPClassifier 0.855072463768116
SVC 0.8478260869565217


In [60]:
# Access the trained models and their scores
print("Random Forest Test Accuracy {:.2f}%".format(trained_models['RandomForestClassifier'].score(xtest, ytest) * 100))
print("Gradient Boosting Test Accuracy {:.2f}%".format(trained_models['GradientBoostingClassifier'].score(xtest, ytest) * 100))
print("Ada Boosting Test Accuracy {:.2f}%".format(trained_models['AdaBoostClassifier'].score(xtest, ytest) * 100))
print("Gaussian NB Test Accuracy {:.2f}%".format(trained_models['DecisionTreeClassifier'].score(xtest, ytest) * 100))
print("Logistic Regression Test Accuracy {:.2f}%".format(trained_models['LogisticRegression'].score(xtest, ytest) * 100))
print("KNN Test Accuracy {:.2f}%".format(trained_models['KNeighborsClassifier'].score(xtest, ytest) * 100))
print("Decision Tree Test Accuracy {:.2f}%".format(trained_models['DecisionTreeClassifier'].score(xtest, ytest) * 100))
print("MLP Classifier Test Accuracy {:.2f}%".format(trained_models['DecisionTreeClassifier'].score(xtest, ytest) * 100))
print("SVC Test Accuracy {:.2f}%".format(trained_models['DecisionTreeClassifier'].score(xtest, ytest) * 100))


Random Forest Test Accuracy 86.23%
Gradient Boosting Test Accuracy 87.68%
Ada Boosting Test Accuracy 86.23%
Gaussian NB Test Accuracy 85.51%
Logistic Regression Test Accuracy 87.68%
KNN Test Accuracy 83.33%
Decision Tree Test Accuracy 85.51%
MLP Classifier Test Accuracy 85.51%
SVC Test Accuracy 85.51%


## As we can see, the accuracy of several of our models has increased after scaling. 
### Such as, 

##### LogisticRegression 0.81
##### KNeighborsClassifier 0.70
##### SVC 0.69

### *********************

##### LogisticRegression 0.87
##### KNeighborsClassifier 0.83
##### SVC 0.84


In [38]:
def performance(model, xtest, ytest):
    ypred = model.predict(xtest)
    #print(classification_report(ytest, ypred))
    #print(confusion_matrix(ytest, ypred))
    print(accuracy_score(ytest, ypred))
    