In [1]:
import numpy as np
np.set_printoptions(threshold=10000,suppress=True)
import pandas as pd
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

## Partie 1

In [2]:
credit = pd.read_csv('./credit_scoring.csv', sep=";", header = 0)
credit.head(3)

Unnamed: 0,Seniority,Home,Time,Age,Marital,Records,Job,Expenses,Income,Assets,Debt,Amount,Price,Status
0,9.0,1.0,60.0,30.0,0.0,1.0,1.0,73.0,129.0,0.0,0.0,800.0,846.0,1
1,17.0,1.0,60.0,58.0,1.0,1.0,0.0,48.0,131.0,0.0,0.0,1000.0,1658.0,1
2,10.0,0.0,36.0,46.0,0.0,2.0,1.0,90.0,200.0,3000.0,0.0,2000.0,2985.0,0


In [3]:
credit.shape

(4375, 14)

In [4]:
X = credit.iloc[:, :13].values
Y = credit.iloc[:, 13].values

In [5]:
MP = 100*np.sum(Y==0)/len(Y)
BP = 100*np.sum(Y==1)/len(Y)

print("MP : " + str(MP) + "/ BP : " + str(BP))

MP : 27.794285714285714/ BP : 72.20571428571428


## Apprentissage

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier

In [7]:
XTrain, XTest, YTrain, YTest = train_test_split(X, Y, test_size = 0.5, random_state = 1)

In [8]:
def classifier(XTrain, XTest, YTrain, YTest):
    DT = DecisionTreeClassifier(criterion='gini', random_state = 1)
    DT.fit(XTrain, YTrain)
    YDT = DT.predict(XTest)
    
    print("Decision Tree :")
    print(confusion_matrix(YTest, YDT))
    print("Accuracy : " + str(np.round(100 * accuracy_score(YTest, YDT), 2)) + "% / Precision : " + str(np.round(100 * precision_score(YTest, YDT), 2)) + "%")
    
    KNN = KNeighborsClassifier(n_neighbors = 5)
    KNN.fit(XTrain, YTrain)
    YKNN = KNN.predict(XTest)
    
    print("KNN :")
    print(confusion_matrix(YTest, YKNN))
    print("Accuracy : " + str(np.round(100 * accuracy_score(YTest, YKNN),2)) + "% / Precision : " + str(np.round(100 * precision_score(YTest, YKNN), 2)) + "%")
    
    MLP = MLPClassifier(random_state = 1, hidden_layer_sizes=(40, 20))
    MLP.fit(XTrain, YTrain)
    YMLP = MLP.predict(XTest)
    
    print("MLP :")
    print(confusion_matrix(YTest, YMLP))
    print("Accuracy : " + str(np.round(100 * accuracy_score(YTest, YMLP), 2)) + "% / Precision : " + str(np.round(100 * precision_score(YTest, YMLP), 2)) + "%")

In [9]:
classifier(XTrain, XTest, YTrain, YTest)

Decision Tree :
[[ 325  279]
 [ 318 1266]]
Accuracy : 72.71% / Precision : 81.94%
KNN :
[[ 189  415]
 [ 187 1397]]
Accuracy : 72.49% / Precision : 77.1%
MLP :
[[ 167  437]
 [ 222 1362]]
Accuracy : 69.88% / Precision : 75.71%


## Normalisation des données

In [10]:
scaler = StandardScaler()
scaler.fit(XTrain)
XTrainNorm = scaler.transform(XTrain)
XTestNorm = scaler.transform(XTest)
classifier(XTrainNorm, XTestNorm, YTrain, YTest)

Decision Tree :
[[ 322  282]
 [ 315 1269]]
Accuracy : 72.71% / Precision : 81.82%
KNN :
[[ 286  318]
 [ 223 1361]]
Accuracy : 75.27% / Precision : 81.06%
MLP :
[[ 342  262]
 [ 227 1357]]
Accuracy : 77.65% / Precision : 83.82%


In [11]:
scaler = MinMaxScaler()
scaler.fit(XTrain)
XTrainNormMinMax = scaler.transform(XTrain)
XTestNormMinMax = scaler.transform(XTest)
classifier(XTrainNormMinMax, XTestNormMinMax, YTrain, YTest)

Decision Tree :
[[ 325  279]
 [ 315 1269]]
Accuracy : 72.85% / Precision : 81.98%
KNN :
[[ 283  321]
 [ 236 1348]]
Accuracy : 74.54% / Precision : 80.77%
MLP :
[[ 321  283]
 [ 179 1405]]
Accuracy : 78.88% / Precision : 83.23%


## ACP

In [18]:
pca = PCA(n_components = 3)
pca.fit(XTrainNorm)
XTrainPCA = pca.transform(XTrainNorm)
XTrainPCA = np.concatenate((XTrainNorm, XTrainPCA), axis = 1)
XTestPCA = pca.transform(XTestNorm)
XTestPCA = np.concatenate((XTestNorm, XTestPCA), axis = 1)
classifier(XTrainPCA, XTestPCA, YTrain, YTest)


Decision Tree :
[[ 351  253]
 [ 324 1260]]
Accuracy : 73.63% / Precision : 83.28%
KNN :
[[ 283  321]
 [ 212 1372]]
Accuracy : 75.64% / Precision : 81.04%
MLP :
[[ 318  286]
 [ 217 1367]]
Accuracy : 77.01% / Precision : 82.7%


In [15]:
XTrainPCA.shape

(2187, 16)