In [None]:
import pandas as pd
import numpy as np
                            
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

In [None]:
xlsx1_filepath = r"F:\Newdataset\muilt_ROI\feature_extract\norm_ALL_CC_Delay.xlsx"
xlsx2_filepath = r"F:\Newdataset\muilt_ROI\feature_extract\norm_ALL_HCC_Delay.xlsx"

data_1 = pd.read_excel(xlsx1_filepath)
data_2 = pd.read_excel(xlsx2_filepath)

In [None]:

rows1,cols1 = data_1.shape
rows2,cols2 = data_2.shape
# print(rows1,cols1)
# data_1.head()
# print(data_1.columns) 


data_1.insert(0,'label',[0]*rows1)  #CC
data_2.insert(0,'label',[1]*rows2)  #HCC

data = pd.concat([data_1,data_2])
# data.head(10)

data = shuffle(data)

x = data[data.columns[1:]]
y = data['label']
colNames = x.columns
x = x.astype(np.float64)
X1 = StandardScaler().fit_transform(x) 
X1 = pd.DataFrame(X1)
X1.columns = colNames

In [None]:
from sklearn import feature_selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import chi2
mi = SelectKBest(mutual_info_classif,k = 200).fit(X1,y)
len(X1.columns[mi.get_support()])
X_mi=mi.transform(X1)

X_mi = pd.DataFrame(X_mi)
X_mi.columns = X1.columns[mi.get_support()]


In [None]:
from sklearn.feature_selection import RFE
rfe = RFE(RandomForestClassifier(n_estimators=100,random_state=0),n_features_to_select=100,step=10)
rfe.fit(X_mi,y)

X_features = X_mi.columns[rfe.get_support(True)]

X1=X1[X_features]

## LASSO

In [None]:
%%time
import warnings
warnings.filterwarnings("ignore")

alphas = np.logspace(-5,3,100) 
# print(alphas)
model_lassoCV = LassoCV(alphas = alphas, cv = 10, max_iter = 100000).fit(X1,y)  
print('The Optimal alpha is :',model_lassoCV.alpha_)  
print('--------------------')
coef = pd.Series(model_lassoCV.coef_ ,index = X1.columns) 

print('LASSO selected ' + str(sum(coef != 0)) + ' variable and eliminated the other' +  str(sum(coef == 0)))

In [None]:
index = coef[coef != 0 ].index
X1 = X1[index]
print(coef[coef != 0])

## order

In [None]:
import matplotlib.pyplot as plt
fig=plt.figure(num=None, figsize=(8,6), dpi=300, facecolor='w', edgecolor='k')

top_coef = abs(coef).sort_values()
top_coef[top_coef != 0].plot(kind = "barh",color = 'slateblue')
plt.title("Most Important Selected Features For E0")

## SVM

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X1,y,test_size = 0.2)
model_svm = svm.SVC(kernel='rbf',gamma = 'auto',probability = True).fit(x_train,y_train)
score_svm1 = model_svm.score(x_train,y_train)
score_svm2 = model_svm.score(x_test,y_test)
print(score_svm1)
print(score_svm2)

In [None]:
  from sklearn.model_selection import GridSearchCV
Cs = np.logspace(-1,3,50,base = 2)
gammas = np.logspace(-4,4,50,base = 2)
param_grid = dict(C = Cs,gamma = gammas)
grid = GridSearchCV(svm.SVC(kernel='rbf'),param_grid = param_grid,cv = 5).fit(X1,y)
print(grid.best_params_)

#c：1.8372539081409516   g:0.07837463407059186

In [None]:
from sklearn.model_selection import cross_val_score
C = grid.best_params_['C']
gamma = grid.best_params_['gamma']
x_train,x_test,y_train,y_test = train_test_split(X1,y,test_size = 0.3)
model_svm = svm.SVC(C = C,kernel='rbf',gamma =gamma,probability = True).fit(x_train,y_train)
score_svm_train = model_svm.score(x_train,y_train)
score_svm_test = model_svm.score(x_test,y_test)
print(score_svm_train)
print(score_svm_test)

# model_cv = cross_val_score(model_svm,X1,y,cv = 10).mean()
# print(model_cv)

In [None]:
y_pred = clf.predict(x_test)
print((y_test).values)
print(y_pred)

from sklearn.metrics import accuracy_score,precision_score, \
recall_score,f1_score,cohen_kappa_score
print('ACC：',
      accuracy_score(y_test,y_pred))
print('precision：',
      precision_score(y_test,y_pred))
print('recall：',
      recall_score(y_test,y_pred))
print('f1_score：',
      f1_score(y_test,y_pred))

from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
from sklearn.metrics import roc_curve,roc_auc_score,auc
import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
#AUC
y_probs = model_svm.predict_proba(x_test)   
# # print(y_probs)                  
# # print(y_probs[:,1])
fpr,tpr,thresholds = roc_curve(y_test,y_probs[:,1],pos_label = 1)

auc_score = auc(fpr,tpr)
plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='ROC curve (AUC = %0.2f)' % auc_score)
# plt.plot(fpr,tpr,marker = '')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC for SVM')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.legend(loc="lower right")
plt.show()

# RF

In [None]:
from sklearn import ensemble
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
x_train,x_test,y_train,y_test = train_test_split(X1,y,test_size = 0.3)
model_rf = ensemble.RandomForestClassifier(n_estimators = 50)
model_rf.fit(x_train,y_train)
score_train = model_rf.score(x_train,y_train)
score_test = model_rf.score(x_test,y_test)
# result = model_rf.predict(X_test)
print(score_train)
print(score_test)
# print(y_test)
# print(result)

# model_cv = cross_val_score(model_rf,X,y,cv = 10).mean()
# print(model_cv)

In [None]:
y_pred = clf.predict(x_test)
print((y_test).values)
print(y_pred)

from sklearn.metrics import accuracy_score,precision_score, \
recall_score,f1_score,cohen_kappa_score
print('ACC：',
      accuracy_score(y_test,y_pred))
print('precision：',
      precision_score(y_test,y_pred))
print('recall：',
      recall_score(y_test,y_pred))
print('f1_score：',
      f1_score(y_test,y_pred))


from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
from sklearn.metrics import roc_curve,roc_auc_score,auc
import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))

y_probs = model_rf.predict_proba(x_test) 
# # print(y_probs)                  
# # print(y_probs[:,1])
fpr,tpr,thresholds = roc_curve(y_test,y_probs[:,1],pos_label = 1)

auc_score = auc(fpr,tpr)
plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='ROC curve (AUC = %0.2f)' % auc_score)
# plt.plot(fpr,tpr,marker = '')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC for RF')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.legend(loc="lower right")
plt.show()

# MLP

In [None]:
from sklearn.neural_network import MLPClassifier

x_train,x_test,y_train,y_test = train_test_split(X1,y,test_size = 0.3)
model_NN = MLPClassifier(activation='relu', solver='adam', alpha=0.001)
model_NN.fit(x_train,y_train)
score_train = model_NN.score(x_train,y_train)
score_test = model_NN.score(x_test,y_test)
print(score_train)
print(score_test)


In [None]:
y_pred = clf.predict(x_test)
print((y_test).values)
print(y_pred)

from sklearn.metrics import accuracy_score,precision_score, \
recall_score,f1_score,cohen_kappa_score
print('ACC：',
      accuracy_score(y_test,y_pred))
print('precision：',
      precision_score(y_test,y_pred))
print('recall：',
      recall_score(y_test,y_pred))
print('f1_score：',
      f1_score(y_test,y_pred))


from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
from sklearn.metrics import roc_curve,roc_auc_score,auc
import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
#AUC
y_probs = model_NN.predict_proba(x_test)  
# # print(y_probs)                 
# # print(y_probs[:,1])
fpr,tpr,thresholds = roc_curve(y_test,y_probs[:,1],pos_label = 1)

auc_score = auc(fpr,tpr)
plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='ROC curve (AUC = %0.2f)' % auc_score)
# plt.plot(fpr,tpr,marker = '')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC for RF')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.legend(loc="lower right")
plt.show()

## XGBoost

In [None]:
from sklearn import metrics
from xgboost import XGBClassifier
X_train,X_test,y_train,y_test = train_test_split(X1,y,test_size = 0.3)
clf = XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=100)
clf.fit(X_train, y_train)
train_predict = clf.predict(X_train)
test_predict = clf.predict(X_test)
print(metrics.accuracy_score(y_train,train_predict))
print(metrics.accuracy_score(y_test,test_predict))


In [None]:
y_pred = clf.predict(x_test)
print((y_test).values)
print(y_pred)

from sklearn.metrics import accuracy_score,precision_score, \
recall_score,f1_score,cohen_kappa_score
print('ACC：',
      accuracy_score(y_test,y_pred))
print('precision：',
      precision_score(y_test,y_pred))
print('recall：',
      recall_score(y_test,y_pred))
print('f1_score：',
      f1_score(y_test,y_pred))


from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
plt.figure(figsize=(8,6))

y_probs = clf.predict_proba(X_test)   
# # print(y_probs)                  
# # print(y_probs[:,1])
fpr,tpr,thresholds = roc_curve(y_test,y_probs[:,1],pos_label = 1)

auc_score = auc(fpr,tpr)
plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='ROC curve (AUC = %0.2f)' % auc_score)
# plt.plot(fpr,tpr,marker = '')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC for XGBoost')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.legend(loc="lower right")
plt.show()