In [1]:
# -*- coding: utf-8 -*-
from __future__ import division
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import Counter 
from imblearn.combine import SMOTEENN

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import validation_curve

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve,  roc_auc_score

from sklearn.metrics import mean_squared_error #MSE
from sklearn.metrics import mean_absolute_error #MAE
from sklearn.metrics import r2_score#R 2
from sklearn.metrics import cohen_kappa_score#Kappa

from sklearn import datasets  
from sklearn import model_selection  
from mlxtend.classifier import StackingClassifier 

from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import hstack



In [2]:
#Precision Statistics
import warnings
warnings.filterwarnings("ignore")
def accuracy(clf,x_train,x_test,y_train,y_test):
    y_pred = clf.predict(x_test)
    print("acc_test=", accuracy_score(y_test, y_pred))
    y_pred = clf.predict(x_train)
    print("acc_train=", accuracy_score(y_train, y_pred))
#分类报告
def report (modle,x_train,y_train):
    y_pred=modle.predict(x_train)
    reports=classification_report(y_train,y_pred)
    print("classification report：\n", reports)

def cross_accuracy(clf,x_train,x_test,y_train,y_test):
    scores=cross_val_score(clf,x_test,y_test,cv=5,scoring='accuracy')
    print("acc_cv_test=",scores.mean())
    scores=cross_val_score(clf,x_train,y_train,cv=5,scoring='accuracy')
    print("acc_cv_train=",scores.mean() )

In [3]:
# Load dataset
data = pd.read_csv(r'E:/huan/ml/data/new/new01.csv')
data = data.fillna(0)
X = data.iloc[:,np.r_[1:13,16:17]].values
Y = data['remote1'].values 
print(X,Y)
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, train_size=0.8)
x_train, x_train_lr, y_train, y_train_lr = train_test_split(
    x_train, y_train, test_size=0.2)

[[2.65522594e+01 2.65522594e+01 7.99000000e+03 ... 1.00677980e+00
  1.40000000e+03 1.20000000e+01]
 [2.69640179e+01 2.69640179e+01 7.99000000e+03 ... 1.00377230e+00
  1.40000000e+03 1.20000000e+01]
 [2.46682053e+01 2.46682053e+01 7.99000000e+03 ... 1.00395260e+00
  1.40000000e+03 1.20000000e+01]
 ...
 [2.38685815e+03 2.58289868e+03 8.65500000e+03 ... 1.08457950e+00
  1.40000000e+03 9.00000000e+00]
 [1.99688309e+02 5.48773003e+01 5.85900000e+03 ... 1.04616280e+00
  1.20000000e+03 1.10000000e+01]
 [6.57715186e+03 7.15558740e+03 8.46600000e+03 ... 1.09718880e+00
  1.60000000e+03 1.00000000e+00]] [1 1 1 ... 0 0 0]


In [4]:
# GradientBoosting
clf1=GradientBoostingClassifier(max_depth= 34, max_features=0.9, max_leaf_nodes=375, n_estimators=43)
clf1.fit(x_train,y_train)
y_test_pred = clf1.predict(x_test)
print("Model GradientBoostingClassifier,Accuracy %0.6f:"%(accuracy_score(y_test,y_test_pred)))
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1score = f1_score(y_test,y_test_pred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test,y_test_pred)
MAE=mean_absolute_error(y_test,y_test_pred)
RMSE=np.sqrt(mean_squared_error(y_test,y_test_pred))
R2=r2_score(y_test,y_test_pred)
Kappa=cohen_kappa_score(y_test, y_test_pred)

print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))
report (clf1,x_test,y_test)

Model GradientBoostingClassifier,Accuracy 0.766820:
precision=0.687049 recall=0.532165 f1score=0.599769
MSE=0.233180 MAE=0.233180 RMSE=0.482887 R2=-0.057393 Kappa=0.438976
classification report：
               precision    recall  f1-score   support

           0       0.79      0.88      0.84     10972
           1       0.69      0.53      0.60      5363

    accuracy                           0.77     16335
   macro avg       0.74      0.71      0.72     16335
weighted avg       0.76      0.77      0.76     16335



In [5]:
# Integration of GBDT and LR models 
grd = clf1
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 
grd.fit(x_train, y_train)
grd_enc.fit(grd.apply(x_train)[:, :, 0])
grd_lm.fit(grd_enc.transform(grd.apply(x_train_lr)[:, :, 0]), y_train_lr)
 
y_pred_grd_lm = grd_lm.predict_proba(
    grd_enc.transform(grd.apply(x_test)[:, :, 0]))[:, 1]
fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)
print("The AUC of GBDT+LR is：", roc_auc_score(y_test, y_pred_grd_lm))

y_testpred = grd_lm.predict(
    grd_enc.transform(grd.apply(x_test)[:, :, 0]))    
print("GBT+LR,Accuracy %0.6f:"%(accuracy_score(y_test, y_testpred)))
precision = precision_score(y_test, y_testpred)
recall = recall_score(y_test, y_testpred)
f1score = f1_score(y_test, y_testpred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test, y_testpred)
MAE=mean_absolute_error(y_test, y_testpred)
RMSE=np.sqrt(mean_squared_error(y_test, y_testpred))
R2=r2_score(y_test, y_testpred)
Kappa=cohen_kappa_score(y_test, y_testpred)
print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))


The AUC of GBDT+LR is： 0.7578938921298763
GBT+LR,Accuracy 0.718212:
precision=0.577173 recall=0.529927 f1score=0.552542
MSE=0.281788 MAE=0.281788 RMSE=0.530837 R2=-0.277811 Kappa=0.347441


In [6]:
# RandomForest
clf2=RandomForestClassifier(max_depth=20, max_features=0.8, max_leaf_nodes=45, n_estimators=1100)
clf2.fit(x_train,y_train)
y_test_pred = clf2.predict(x_test)
print("Model RandomForestClassifier,Accuracy %0.6f:"%(accuracy_score(y_test,y_test_pred)))
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test,y_test_pred)
f1score = f1_score(y_test,y_test_pred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test,y_test_pred)
MAE=mean_absolute_error(y_test,y_test_pred)
RMSE=np.sqrt(mean_squared_error(y_test,y_test_pred))
R2=r2_score(y_test,y_test_pred)
Kappa=cohen_kappa_score(y_test, y_test_pred)
print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))
report (clf2,x_test,y_test)

Model RandomForestClassifier,Accuracy 0.724518:
precision=0.632159 recall=0.384859 f1score=0.478442
MSE=0.275482 MAE=0.275482 RMSE=0.524864 R2=-0.249217 Kappa=0.305996
classification report：
               precision    recall  f1-score   support

           0       0.75      0.89      0.81     10972
           1       0.63      0.38      0.48      5363

    accuracy                           0.72     16335
   macro avg       0.69      0.64      0.65     16335
weighted avg       0.71      0.72      0.70     16335



In [7]:
# Integration of RF and LR models
rf = clf2
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression(solver='lbfgs', max_iter=1000)    

rf.fit(x_train, y_train)
rf_enc.fit(rf.apply(x_train))
rf_lm.fit(rf_enc.transform(rf.apply(x_train_lr)), y_train_lr)
     
y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(x_test)))[:, 1]
fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
print("The AUC of RF+LR is：", roc_auc_score(y_test, y_pred_rf_lm))

y_testpred = rf_lm.predict(rf_enc.transform(rf.apply(x_test)))    
print("RF+LR,Accuracy %0.6f:"%(accuracy_score(y_test, y_testpred)))
precision = precision_score(y_test, y_testpred)
recall = recall_score(y_test, y_testpred)
f1score = f1_score(y_test, y_testpred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test, y_testpred)
MAE=mean_absolute_error(y_test, y_testpred)
RMSE=np.sqrt(mean_squared_error(y_test, y_testpred))
R2=r2_score(y_test, y_testpred)
Kappa=cohen_kappa_score(y_test, y_testpred)
print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))


The AUC of RF+LR is： 0.6946599667629888
RF+LR,Accuracy 0.686134:
precision=0.523137 recall=0.497483 f1score=0.509988
MSE=0.313866 MAE=0.313866 RMSE=0.560237 R2=-0.423275 Kappa=0.279329


In [8]:
# xgboost
clf4=xgb.XGBClassifier(max_depth=9, n_estimators=145, reg_alpha=0.9, reg_lambda=0.8,eval_metric=['logloss','auc','error'],learning_rate=0.1,n_jobs=-1)
clf4.fit(x_train,y_train)
y_test_pred = clf4.predict(x_test)
print("Model XGBClassifier,Accuracy %0.6f:"%(accuracy_score(y_test,y_test_pred)))
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1score = f1_score(y_test,y_test_pred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test,y_test_pred)
MAE=mean_absolute_error(y_test,y_test_pred)
RMSE=np.sqrt(mean_squared_error(y_test,y_test_pred))
R2=r2_score(y_test,y_test_pred)
Kappa=cohen_kappa_score(y_test, y_test_pred)
print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))


Model XGBClassifier,Accuracy 0.773370:
precision=0.697409 recall=0.547082 f1score=0.613166
MSE=0.226630 MAE=0.226630 RMSE=0.476057 R2=-0.027690 Kappa=0.456194


In [9]:
# Integration of xgboost and LR models
xgb = clf4
xgb_enc = OneHotEncoder()
xgb_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 
xgb.fit(x_train, y_train)
xgb_enc.fit(xgb.apply(x_train))
xgb_lm.fit(xgb_enc.transform(xgb.apply(x_train_lr)), y_train_lr)
 
y_pred_xgb_lm = xgb_lm.predict_proba(
    xgb_enc.transform(xgb.apply(x_test)))[:, 1]
fpr_xgb_lm, tpr_xgb_lm, _ = roc_curve(y_test, y_pred_xgb_lm)
print("The AUC of XGB+LR is：", roc_auc_score(y_test, y_pred_xgb_lm))

y_testpred = xgb_lm.predict(
    xgb_enc.transform(xgb.apply(x_test))) 
print("xgboost+LR,Accuracy %0.6f:"%(accuracy_score(y_test, y_testpred)))
precision = precision_score(y_test, y_testpred)
recall = recall_score(y_test, y_testpred)
f1score = f1_score(y_test, y_testpred)
print("precision=%f recall=%f f1score=%f"%(precision, recall, f1score))
MSE=mean_squared_error(y_test, y_testpred)
MAE=mean_absolute_error(y_test, y_testpred)
RMSE=np.sqrt(mean_squared_error(y_test, y_testpred))
R2=r2_score(y_test, y_testpred)
Kappa=cohen_kappa_score(y_test, y_testpred)
print("MSE=%f MAE=%f RMSE=%f R2=%f Kappa=%f"%(MSE,MAE,RMSE,R2,Kappa))


The AUC of XGB+LR is： 0.767866202098077
xgboost+LR,Accuracy 0.719988:
precision=0.578695 recall=0.540929 f1score=0.559175
MSE=0.280012 MAE=0.280012 RMSE=0.529162 R2=-0.269760 Kappa=0.354350


In [10]:
#data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
#data = data.fillna(0)
#x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
#y_label_new1_predict = clf2.predict_proba(x_fearures_new1)
#y_predict1 = y_label_new1_predict[:,1]
#print('The New point 1 predict class:\n',y_predict1)
#result = pd.DataFrame(y_predict1)
#data['rf']=result
##result=pd.DataFrame(columns=['yuce'], data=y_predict1)
#data.to_csv(r'E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
#print(data)

In [11]:
#data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
#data = data.fillna(0)
#x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
#y_label_new1_predict = clf1.predict_proba(x_fearures_new1)
#y_predict1 = y_label_new1_predict[:,1]
#print('The New point 1 predict class:\n',y_predict1)
#result = pd.DataFrame(y_predict1)
#data['gbdt']=result
##result=pd.DataFrame(columns=['yuce'], data=y_predict1)
#data.to_csv(r'E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
#print(data)

In [12]:
#data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
#data = data.fillna(0)
#x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
#y_label_new1_predict = clf4.predict_proba(x_fearures_new1)
#y_predict1 = y_label_new1_predict[:,1]
#print('The New point 1 predict class:\n',y_predict1)
#result = pd.DataFrame(y_predict1)
#data['xgb']=result
##result=pd.DataFrame(columns=['yuce'], data=y_predict1)
#data.to_csv(r'E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
#print(data)

In [14]:
data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
data = data.fillna(0)
x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
y_label_new1_predict = rf_lm.predict_proba(
    rf_enc.transform(rf.apply(x_fearures_new1)))
y_predict1 = y_label_new1_predict[:,1]
print('The New point 1 predict class:\n',y_predict1)
result = pd.DataFrame(y_predict1)
data['rflr']=result
#result=pd.DataFrame(columns=['yuce'], data=y_predict1)
data.to_csv('E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
print(data)

The New point 1 predict class:
 [0.56189825 0.09115646 0.01632896 ... 0.34604173 0.06106415 0.00063088]
         OBJECTID       rivers         roads  NDVI      aspect     plane  \
0               1  5213.345703   5511.335449  8172  134.573517  0.181283   
1               2     0.000000      0.000000     0    0.000000  0.000000   
2               3     9.365650      9.365650  7990  280.426361  0.809007   
3               4  5140.192383   5527.872559  8172  137.206558  0.580557   
4               5  5100.440918   5601.580566  8172  118.792221  0.319655   
...           ...          ...           ...   ...         ...       ...   
1166578   1166579  4473.856934  11240.130859     0    0.000000  0.000000   
1166579   1166580  4503.968750  11278.761719     0    0.000000  0.000000   
1166580   1166581  4535.937988  11318.237305     0    0.000000  0.000000   
1166581   1166582  4569.834473  11358.653320     0    0.000000  0.000000   
1166582   1166583  4599.093750  11392.606445     0    0.0000

In [20]:
data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
data = data.fillna(0)
x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
#y_label_new1_predict = grd_lm.predict_proba(grd_enc.transform(grd.apply(x_fearures_new1)))[:, 1]
y_label_new1_predict = grd_lm.predict_proba(grd_enc.transform(grd.apply(x_fearures_new1)[:, :, 0]))
y_predict1 = y_label_new1_predict[:,1]
print('The New point 1 predict class:\n',y_predict1)
result = pd.DataFrame(y_predict1)
data['gbdtlr']=result
#result=pd.DataFrame(columns=['yuce'], data=y_predict1)
data.to_csv(r'E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
print(data)

The New point 1 predict class:
 [0.85059018 0.11779791 0.0875024  ... 0.32355981 0.04724984 0.00339247]
         OBJECTID       rivers         roads  NDVI      aspect     plane  \
0               1  5213.345703   5511.335449  8172  134.573517  0.181283   
1               2     0.000000      0.000000     0    0.000000  0.000000   
2               3     9.365650      9.365650  7990  280.426361  0.809007   
3               4  5140.192383   5527.872559  8172  137.206558  0.580557   
4               5  5100.440918   5601.580566  8172  118.792221  0.319655   
...           ...          ...           ...   ...         ...       ...   
1166578   1166579  4473.856934  11240.130859     0    0.000000  0.000000   
1166579   1166580  4503.968750  11278.761719     0    0.000000  0.000000   
1166580   1166581  4535.937988  11318.237305     0    0.000000  0.000000   
1166581   1166582  4569.834473  11358.653320     0    0.000000  0.000000   
1166582   1166583  4599.093750  11392.606445     0    0.0000

In [22]:
data = pd.read_csv(r'E:/huan/ml/data/new/zjjdata.csv')
data = data.fillna(0)
x_fearures_new1 = data.iloc[:,np.r_[1:13,16:17]].values
x_fearures_new1 = xgb_lm.predict_proba(
    xgb_enc.transform(xgb.apply(x_fearures_new1)))
y_predict1 = y_label_new1_predict[:,1]
print('The New point 1 predict class:\n',y_predict1)
result = pd.DataFrame(y_predict1)
data['xgblr1']=result
#result=pd.DataFrame(columns=['yuce'], data=y_predict1)
data.to_csv(r'E:/huan/ml/data/new/zjjdata.csv',mode = 'a',index=False)
print(data)

The New point 1 predict class:
 [0.85059018 0.11779791 0.0875024  ... 0.32355981 0.04724984 0.00339247]
         OBJECTID       rivers         roads  NDVI      aspect     plane  \
0               1  5213.345703   5511.335449  8172  134.573517  0.181283   
1               2     0.000000      0.000000     0    0.000000  0.000000   
2               3     9.365650      9.365650  7990  280.426361  0.809007   
3               4  5140.192383   5527.872559  8172  137.206558  0.580557   
4               5  5100.440918   5601.580566  8172  118.792221  0.319655   
...           ...          ...           ...   ...         ...       ...   
1166578   1166579  4473.856934  11240.130859     0    0.000000  0.000000   
1166579   1166580  4503.968750  11278.761719     0    0.000000  0.000000   
1166580   1166581  4535.937988  11318.237305     0    0.000000  0.000000   
1166581   1166582  4569.834473  11358.653320     0    0.000000  0.000000   
1166582   1166583  4599.093750  11392.606445     0    0.0000

In [None]:
# ROC curves
from sklearn import metrics
import pylab as plt

Font={'size':18, 'family':'Times New Roman'}

y_probas1 = clf2.predict_proba(x_test)
y_probas2 = clf1.predict_proba(x_test)
y_probas3 = clf4.predict_proba(x_test)

y_scores1 = y_probas1[:,1]
y_scores2 = y_probas2[:,1]
y_scores3 = y_probas3[:,1]

fpr1,tpr1,thres1 = metrics.roc_curve(y_test, y_scores1,drop_intermediate=False)
fpr2,tpr2,thres2 = metrics.roc_curve(y_test, y_scores2,drop_intermediate=False)
fpr3,tpr3,thres3 = metrics.roc_curve(y_test, y_scores3,drop_intermediate=False)

fpr4,tpr4,thres4 = roc_curve(y_test, y_pred_rf_lm)
fpr5,tpr5,thres5 = roc_curve(y_test, y_pred_grd_lm)
fpr6,tpr6,thres6 = roc_curve(y_test, y_pred_xgb_lm)

roc_auc4 = metrics.auc(fpr1, tpr1)
roc_auc5 = metrics.auc(fpr2, tpr2)
roc_auc6 = metrics.auc(fpr3, tpr3)

roc_auc1 = metrics.auc(fpr4, tpr4)
roc_auc2 = metrics.auc(fpr5, tpr5)
roc_auc3 = metrics.auc(fpr6, tpr6)

print(roc_auc1,roc_auc2,roc_auc3,roc_auc4,roc_auc5,roc_auc6)
  
plt.figure(figsize=(6,6))
plt.plot(fpr4, tpr4, 'b', label = 'RF = %0.4f' % roc_auc1, color='Red')
plt.plot(fpr5, tpr5, 'b', label = 'GBDT = %0.4f' % roc_auc2, color='k')
plt.plot(fpr5, tpr5, 'b', label = 'XGB = %0.4f' % roc_auc3, color='Blue')

plt.plot(fpr1, tpr1, 'b', label = 'RF+LR = %0.4f' % roc_auc4, color='orange')
plt.plot(fpr2, tpr2, 'b', label = 'GBDT+LR = %0.4f' % roc_auc5, color='Green')
plt.plot(fpr3, tpr3, 'b', label = 'XGB+LR = %0.4f' % roc_auc6, color='purple')
    
plt.legend(loc = 'lower right', prop=Font)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate', Font)
plt.xlabel('False Positive Rate', Font)
plt.tick_params(labelsize=15)
plt.show()

In [None]:
# Confusion matrix
import matplotlib.pyplot as plt
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Greens):
    """
    This function prints and plots the confusion matrix. 
    
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
   
    print(cm)
    fig, ax = plt.subplots(figsize=(12, 8),dpi=200)
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           ylabel='True label',
           xlabel='Predicted label')
    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black",
                    fontsize=12)
    fig.tight_layout()
    return ax

labels = ['0','1']

In [None]:
# Implementation of confusion matrixs
plot_confusion_matrix(y_test, clf1.predict(x_test), classes=labels, normalize=True,title='Normalized confusion matrix')
plot_confusion_matrix(y_test, clf2.predict(x_test), classes=labels, normalize=True,title='Normalized confusion matrix')
plot_confusion_matrix(y_test, clf4.predict(x_test), classes=labels, normalize=True,title='Normalized confusion matrix')
plot_confusion_matrix(y_test, grd_lm.predict(grd_enc.transform(grd.apply(x_test)[:, :, 0])), classes=labels, normalize=True,title='Normalized confusion matrix')
plot_confusion_matrix(y_test, rf_lm.predict(rf_enc.transform(rf.apply(x_test))), classes=labels, normalize=True,title='Normalized confusion matrix')
plot_confusion_matrix(y_test, xgb_lm.predict(xgb_enc.transform(xgb.apply(x_test))), classes=labels, normalize=True,title='Normalized confusion matrix')