In [1]:
import os
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split,StratifiedKFold,KFold,learning_curve,validation_curve
from sklearn import metrics
from sklearn.metrics import mean_squared_error,accuracy_score,confusion_matrix, roc_curve, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA
from scipy import stats
from libtsvm.estimators import TSVM
from libtsvm.model_selection import Validator, grid_search
from libtsvm.preprocess import DataReader

In [None]:
df = pd.read_csv("bt_dataset_t3.csv")
print(df.describe())
print(df.shape)

In [None]:
df.head()
print(df.shape)

In [None]:
df = df.replace([np.inf, -np.inf], np.nan)
df = df.fillna(df.mean())
# df = df.drop(['Image','PSNR','SSIM','DC','Coarseness','TS'], axis=1)
df = df.drop(['Image','Coarseness'], axis=1)
df['Class'] = df['Class'].replace(to_replace =0,value =-1) 


In [None]:
z = np.abs(stats.zscore(df))
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[(z < 2.1).all(axis=1)]
df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
df.shape

In [None]:
y = df['Class']
X = df.drop(['Class'], axis=1)
np.sum(df['Class']==-1)

In [None]:
df.to_csv('final_data.csv', index=False)

df.head()

###### Split data into train and test

In [None]:
X_train, X_test,  y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
def plot_learning_curve(estimator, title, X, y, axes=None, ylim=None, cv=None,
                        n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5),TN=None,FP=None,TP=None,FN=None,mse=None,AC = None,F1=None,SE=None,SP=None,PR=None,BER=None,TA=None,fname=None):
   
    if axes is None:
        fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize=(20, 10))
        

    
    start = time.time() 
    train_sizes, train_scores, test_scores, fit_times, _ = \
        learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs,
                       train_sizes=train_sizes,scoring='neg_mean_squared_error',
                       return_times=True)
    TT = time.time()-start
    
    train_scores_mean = -np.mean(train_scores, axis=1)
    train_scores_std = -np.std(train_scores, axis=1)
    test_scores_mean = -np.mean(test_scores, axis=1)
    test_scores_std = -np.std(test_scores, axis=1)
    fit_times_mean = np.mean(fit_times, axis=1)
    fit_times_std = np.std(fit_times, axis=1)
    
    a = fname.split()
    if a[0] == 'SVM' or a[0]=='Twin':
        param_range = np.logspace(-5, 0, 5)
        train_scoresv, test_scoresv = validation_curve(estimator, X, y, param_name="gamma", param_range=param_range,scoring="accuracy", cv=5)
    else:
        param_range = np.arange(1, 500, 5)
        train_scoresv, test_scoresv = validation_curve(estimator, X, y, param_name="n_estimators", param_range=param_range,scoring="accuracy", cv=5)
        
    train_scores_meanv = np.mean(train_scoresv, axis=1)
    train_scores_stdv = np.std(train_scoresv, axis=1)
    test_scores_meanv = np.mean(test_scoresv, axis=1)
    test_scores_stdv = np.std(test_scoresv, axis=1)
    
    print(total_time)
    print(fit_times_mean)
    print(train_scores_mean)
    print(test_scores_mean)
    avg_train = np.mean(train_scores_mean)
    avg_test = np.mean(test_scores_mean)
    if not os.path.exists('result.csv'):
        columns = ['Model','TN','FP','FN','TP','Accuracy','Sensitivity','Specificity','Precision','F1_Score','MSE','Error Rate','Training Accuracy','Time']
        df = pd.DataFrame(columns = columns)
        lst = pd.Series({'Model':fname,'TN':TN,'FP':FP,'FN':FN,'TP':TP,'Accuracy':AC,
                         'Sensitivity':SE,'Specificity':SP,'Precision':PR,'F1_Score':F1,
                         'MSE':mse,'Error Rate':BER,'Training Accuracy':TA,'Time':TT})

        df = df.append(lst, ignore_index=True)
        df.to_csv('result.csv',index=False)
    else:
        df = pd.read_csv('result.csv')
        lst = pd.Series({'Model':fname,'TN':TN,'FP':FP,'FN':FN,'TP':TP,'Accuracy':AC,
                         'Sensitivity':SE,'Specificity':SP,'Precision':PR,'F1_Score':F1,
                         'MSE':mse,'Error Rate':BER,'Training Accuracy':TA,'Time':TT})
        df = df.append(lst, ignore_index=True)
        df.to_csv('result.csv',index=False)


    # Plot learning curve
    ax1.grid()
    ax1.fill_between(train_sizes, train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.1,
                         color="r")
    ax1.fill_between(train_sizes, test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.1,
                         color="g")
    ax1.plot(train_sizes, train_scores_mean, 'o-', color="r",
                 label="Training error")
    ax1.plot(train_sizes, test_scores_mean, 'o-', color="b",
                 label="Cross Validation error")
    ax1.legend(loc="best")
    ax1.set_title(title)
    ax1.set_xlabel("Training set size")
    ax1.set_ylabel("MSE")

    # validation curve
    lw = 2
    ax2.grid()
    ax2.semilogx(param_range, train_scores_meanv, label="Training score",
             color="darkorange", lw=lw)
    ax2.fill_between(param_range, train_scores_meanv - train_scores_stdv,
                     train_scores_meanv + train_scores_stdv, alpha=0.2,
                     color="darkorange", lw=lw)
    ax2.semilogx(param_range, test_scores_meanv, label="Cross-validation score",
                 color="navy", lw=lw)
    ax2.fill_between(param_range, test_scores_meanv - test_scores_stdv,
                     test_scores_meanv + test_scores_stdv, alpha=0.2,
                     color="navy", lw=lw)
    
    ax2.set_xlabel("Training set size")
    ax2.set_ylabel("Score")
    ax2.set_title("Validation Curve")
    ax2.legend(loc="best")
    
    
     # Plot n_samples vs fit_times
    ax3.grid()
    ax3.plot(train_sizes, fit_times_mean, 'o-')
    ax3.fill_between(train_sizes, fit_times_mean - fit_times_std,
                         fit_times_mean + fit_times_std, alpha=0.1)
    ax3.set_xlabel("Training examples")
    ax3.set_ylabel("fit_times")
    ax3.set_title("Scalability of the model")

    # Plot fit_time vs score
    ax4.grid()
    ax4.plot(fit_times_mean, test_scores_mean, 'o-')
    ax4.fill_between(fit_times_mean, test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.1)
    ax4.set_xlabel("fit_times")
    ax4.set_ylabel("Error")
    ax4.set_title("Performance of the model")


    fig.savefig(fname+".png")
    
    return plt

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

###### SVM with 5 fold

In [None]:
steps = [('clf',SVC(kernel='rbf'))]
parameters = {
    'clf__C':[0.001,0.1,10,10e5],
    'clf__gamma':[0.1,0.01,0.001]
}
pipeline = Pipeline(steps)

In [None]:
cv=5
grid = GridSearchCV(pipeline,param_grid=parameters,cv=cv)
grid.fit(X_train,y_train)
parm = grid.best_params_
print("Score for %d fold : = %f"%(cv,grid.score(X_test,y_test)))
print("Parameters : ",parm)

In [None]:
cv = 5
start = time.time() 
clf = SVC(kernel='rbf', gamma=parm['clf__gamma'],C=parm['clf__C'],probability=True)
clf.fit(X_train,y_train)
y_pred_test= clf.predict(X_test)
total_time = time.time()-start

y_pred_train= clf.predict(X_train)

In [None]:
mse =mean_squared_error(y_test, y_pred_test)
Train_Accuracy = accuracy_score(y_train, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_test, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mse)



In [None]:
TN, FP, FN, TP = confusion_matrix(y_test,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
title = r"Learning Curves (SVM)"
fname = "SVM"
plot_learning_curve(clf, title, X_train, y_train,
                    cv=cv, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

###### PCA + SVM with 5 folds

In [None]:
steps = [('pca',PCA()),('clf',SVC(kernel='rbf'))]
parameters = {
    'pca__n_components' :[2,3,4],
    'clf__C':[0.001,0.1,10,100,10e5],
    'clf__gamma':[1,0.1,0.01,0.001]
}
pipeline = Pipeline(steps)

In [None]:
cv=5
grid = GridSearchCV(pipeline,param_grid=parameters,cv=cv)
grid.fit(X_train,y_train)
parm = grid.best_params_
print("Score for %d fold : = %f"%(cv,grid.score(X_test,y_test)))
print("Parameters : ",parm)

In [None]:
pca = PCA(n_components= parm['pca__n_components']) 
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
start = time.time() 
clf1 = SVC(kernel="rbf", gamma=parm['clf__gamma'], C=parm['clf__C'], probability=True)
clf1.fit(X_train,y_train)
y_pred_test= clf1.predict(X_test)
total_time = time.time()-start# # for cv in tqdm(range(4,6)):# # for cv in tqdm(range(4,6)):# # for cv in tqdm(range(4,6)):

y_pred_train= clf1.predict(X_train)

In [None]:
mse = mean_squared_error(y_test, y_pred_test)
Train_Accuracy = accuracy_score(y_train, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_test, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mean_squared_error(y_test, y_pred_test))



In [None]:
TN, FP, FN, TP = confusion_matrix(y_test,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
title = r"Learning Curves (SVM with PCA)"
fname = "SVM with PCA"
plot_learning_curve(clf1, title, X_train, y_train,  ylim=(0.7, 1.01),cv=5, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

#### Gradient Boosting

In [None]:
steps = [('clf2',GradientBoostingClassifier())]

param_grid={'clf2__n_estimators':[10,100,1000], 
            'clf2__learning_rate': [0.1,1, 0.001, 0.01], 
            'clf2__max_depth':[3,4,5,6], 
            'clf2__min_samples_leaf':[3,5], 
            'clf2__max_features':[1.0] 
           } 
n_jobs=4 
pipeline = Pipeline(steps)

In [None]:
cv=5
grid = GridSearchCV(pipeline,param_grid=param_grid,cv=cv)
grid.fit(X_train,y_train)
parm = grid.best_params_
print("Score for %d fold : = %f"%(cv,grid.score(X_test,y_test)))
print("Parameters : ",parm)

In [None]:
start = time.time() 
clf2 = GradientBoostingClassifier(learning_rate =parm['clf2__learning_rate'], max_depth=parm['clf2__max_depth'], max_features=parm['clf2__max_features'], min_samples_leaf=parm['clf2__min_samples_leaf'],n_estimators=parm['clf2__n_estimators'],random_state=0)
clf2.fit(X_train, y_train)
y_pred_test = clf2.predict(X_test)
total_time = time.time()-start

y_pred_train = clf2.predict(X_train)

mse = mean_squared_error(y_test, y_pred_test)
Train_Accuracy = accuracy_score(y_train, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_test, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mean_squared_error(y_test, y_pred_test))



In [None]:
TN, FP, FN, TP = confusion_matrix(y_test,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
title = r"Learning Curves (Gradient Boosting)"
fname = "Gradient Boosting"
plot_learning_curve(clf2, title, X_train, y_train,  ylim=(0.7, 1.01),cv=5, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

#### Gradient Boosting with PCA


In [None]:
steps = [('pca',PCA()),('clf3',GradientBoostingClassifier())]

param_grid={'pca__n_components' :[2,3,4],
            'clf3__n_estimators':[10,100,1000], 
            'clf3__learning_rate': [0.1,1, 0.001, 0.01], 
            'clf3__max_depth':[3,4,5,6], 
            'clf3__min_samples_leaf':[3,5], 
            'clf3__max_features':[1.0] 
           } 
n_jobs=4 
pipeline = Pipeline(steps)

In [None]:
cv=5
grid = GridSearchCV(pipeline,param_grid=param_grid, cv=cv)
grid.fit(X_train,y_train)
parm = grid.best_params_
print("Score for %d fold : = %f"%(cv,grid.score(X_test,y_test)))
print("Parameters : ",parm)

In [None]:
pca = PCA(n_components= parm['pca__n_components']) 
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
start = time.time() 
clf3 = GradientBoostingClassifier(learning_rate =parm['clf3__learning_rate'], max_depth=parm['clf3__max_depth'], max_features=parm['clf3__max_features'], min_samples_leaf=parm['clf3__min_samples_leaf'],n_estimators=parm['clf3__n_estimators'],random_state=0)
clf3.fit(X_train,y_train)
y_pred_test= clf3.predict(X_test)
total_time = time.time()-start

y_pred_train= clf3.predict(X_train)

In [None]:
mse = mean_squared_error(y_test, y_pred_test)
Train_Accuracy = accuracy_score(y_train, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_test, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mean_squared_error(y_test, y_pred_test))


In [None]:
TN, FP, FN, TP = confusion_matrix(y_test,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
title = r"Learning Curves (Gradient Boosting with PCA)"
fname = "Gradient Boosting with PCA"
plot_learning_curve(clf3, title, X_train, y_train,  ylim=(0.7, 1.01),cv=5, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

#### Twin SVM

In [None]:
# Step 1: Load your dataset
data_path = '/home/jakesh/thesis/jaya/final_data.csv'
sep_char = ',' # separtor character of the CSV file
header = True # Whether the dataset has header names.

dataset = DataReader(data_path, sep_char, header)
shuffle_data = True
normalize_data = False

dataset.load_data(shuffle_data, normalize_data)
Xts, yts, file_name = dataset.get_data()
X_traints, X_testts,  y_traints, y_testts = train_test_split(Xts, yts, test_size=0.3, random_state=42)
sc = StandardScaler()
X_traints = sc.fit_transform(X_traints)
X_testts = sc.transform(X_testts)

In [None]:
tsvm_clf = TSVM(kernel='linear')

val = Validator(X_traints, y_traints, ('CV', 5), tsvm_clf) # 5-fold cross-validation
eval_method = val.choose_validator()

# Step 4: Specify range of each hyper-parameter for a TSVM-based estimator.
params = {'C1': (-2, 2), 'C2': (-2, 2), 'gamma': (-10, 2)}

best_acc, best_acc_std, opt_params, clf_results = grid_search(eval_method, params)

print("Best accuracy: %.2f+-%.2f | Optimal parameters: %s" % (best_acc, best_acc_std,
                                                                                  str(opt_params)))

In [None]:
c1 = opt_params['C1']
c2 = opt_params['C2']
gamma = opt_params['gamma']
start = time.time() 
tsvm_clf = TSVM(kernel='linear',C1= c1, C2= c2, gamma= gamma)
tsvm_clf.fit(X_traints,y_traints)
y_pred_test= tsvm_clf.predict(X_testts)
total_time = time.time()-start
y_pred_train= tsvm_clf.predict(X_traints)
mse =mean_squared_error(y_testts, y_pred_test)
Train_Accuracy = accuracy_score(y_traints, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_testts, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mse)

In [None]:
TN, FP, FN, TP = confusion_matrix(y_testts,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
cv=5
title = r"Learning Curves (Twin SVM)"
fname = "Twin SVM"
plot_learning_curve(tsvm_clf, title, X_traints, y_traints,
                    cv=cv, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

#### Twin SVM + PCA

In [None]:
pca = PCA(n_components= 4) 
X_traints = pca.fit_transform(X_traints)
X_testts = pca.transform(X_testts)

In [None]:
tsvm_clf1 = TSVM(kernel='linear')

val = Validator(X_traints, y_traints, ('CV', 5), tsvm_clf) # 5-fold cross-validation
eval_method = val.choose_validator()

# Step 4: Specify range of each hyper-parameter for a TSVM-based estimator.
params = {'C1': (-2, 2), 'C2': (-2, 2), 'gamma': (-10, 2)}

best_acc, best_acc_std, opt_params, clf_results = grid_search(eval_method, params)

print("Best accuracy: %.2f+-%.2f | Optimal parameters: %s" % (best_acc, best_acc_std,
                                                                                  str(opt_params)))

In [None]:
c1 = opt_params['C1']
c2 = opt_params['C2']
gamma = opt_params['gamma']
start = time.time() 
tsvm_clf1 = TSVM(kernel='linear',C1= c1, C2= c2, gamma= gamma)
a=tsvm_clf1.fit(X_traints,y_traints)
y_pred_test= tsvm_clf1.predict(X_testts)
total_time = time.time()-start
y_pred_train= tsvm_clf1.predict(X_traints)
mse =mean_squared_error(y_testts, y_pred_test)
Train_Accuracy = accuracy_score(y_traints, y_pred_train)
print('Train Accuracy',Train_Accuracy)
print('Test Accuracy',accuracy_score(y_testts, y_pred_test))
print('Total Time : ',total_time)
print('MSE: ',mse)

In [None]:
TN, FP, FN, TP = confusion_matrix(y_testts,y_pred_test).ravel()
print(TN, FP, FN, TP)
SE = TP/(TP+FN)  #sensitivity, recall, hit rate, or true positive rate (TPR)
SP = TN/(TN+FP)  #specificity, selectivity or true negative rate (TNR)
AC = (TP+TN)/(TP+TN+FP+FN)   #accuracy
PR = TP/(TP+FP)   #precision, positive predictive value (PPV)
BCR = 1/2*(SE+SP)   #balanced classification rate
BER = 1-BCR   #balanced error rate
F1 = 2*PR*SE/(PR+SE)   #F1 score
FNR = 1-SE   #miss rate or false negative rate (FNR)
FPR = 1-SP   #fall-out or false positive rate (FPR)
TS = TP/(TP+FP+FN)   #Threat score (TS) or Critical Success Index (CSI)
DC = 2*TP/(2*TP+FP+FN) #Dice Coefficient
print(AC,F1,TS,DC)

In [None]:
cv=5
title = r"Learning Curves (Twin SVM with PCA)"
fname = "Twin SVM with PCA"
plot_learning_curve(tsvm_clf1, title, X_traints, y_traints,
                    cv=cv, TN=TN,FP=FP,TP=TP,FN=FN,mse=mse,AC = AC,F1=F1,SE=SE,SP=SP,PR=PR,BER=BER,TA=Train_Accuracy,fname=fname)

plt.show()

ROC Plot

In [None]:
plt.figure()
models = [
    {
    'label': 'SVM',
    'model': clf1,
},
    {
    'label': 'SVM with PCA',
    'model': clf1,
},
{
    'label': 'Gradient Boosting',
    'model': clf2,
},
{
    'label': 'Gradient Boosting with PCA',
    'model': clf3,
},
{
    'label': 'Twin SVM',
    'model': tsvm_clf,
},
{
    'label': 'Twin SVM',
    'model': tsvm_clf1,
}
]
for m in models:
    model = m['model'] 
    
    if model == tsvm_clf or model == tsvm_clf1:
        model.fit(X_traints, y_traints) 
        y_pred=model.predict(X_testts) 
        ax=model.decision_function(X_testts)[::,0]
        nb = normalize([ax])  
        fpr, tpr, thresholds = metrics.roc_curve(y_testts, nb[0]) 
        X_test1 = X_testts
        y_test1 = y_testts
        
    else:
        model.fit(X_train, y_train) 
        y_pred=model.predict(X_test) 
        fpr, tpr, thresholds = metrics.roc_curve(y_test, model.predict_proba(X_test)[:,1])
        X_test1 = X_test
        y_test1 = y_test
    auc = metrics.roc_auc_score(y_test1,model.predict(X_test1))
    plt.plot(fpr, tpr, label='%s ROC (area = %0.6f)' % (m['label'], auc))
# plt.figure(figsize=(15,10))
plt.xlim([-0.05, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1-Specificity(False Positive Rate)')
plt.ylabel('Sensitivity(True Positive Rate)')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig("ROC.png")
plt.show()   # Display

Bar Plot


In [None]:
result = pd.read_csv("result.csv")


In [None]:
result

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
ax = sns.barplot(x=result['Accuracy'], y=result['Model'], data=result)
for index, row in result.iterrows():
    ax.text(row.Accuracy,row.name, str(round(row.Accuracy*100,2))+'%', color='black', ha="right")
ax.figure.savefig("Accuracy Bar Plot.png")

In [None]:
ax = sns.barplot(x=result['Sensitivity'], y=result['Model'], data=result)
for index, row in result.iterrows():
    ax.text(row.Sensitivity,row.name, str(round(row.Sensitivity*100,2))+'%', color='black', ha="right")
ax.figure.savefig("Sensitivity Bar Plot.png")

In [None]:
ax = sns.barplot(x=result['Specificity'], y=result['Model'], data=result)
for index, row in result.iterrows():
    ax.text(row.Specificity,row.name, str(round(row.Specificity*100,2))+'%', color='black', ha="right")
ax.figure.savefig("Specificity Bar Plot.png")

In [None]:
ax = sns.barplot(x=result['Precision'], y=result['Model'], data=result)
for index, row in result.iterrows():
    ax.text(row.Precision,row.name, str(round(row.Precision*100,2))+'%', color='black', ha="right")
ax.figure.savefig("Precision Bar Plot.png")

In [None]:
ax = sns.barplot(x=result['F1_Score'], y=result['Model'], data=result)
for index, row in result.iterrows():
    ax.text(row.F1_Score,row.name, str(round(row.F1_Score*100,2))+'%', color='black', ha="right")
ax.figure.savefig("F1-Score Bar Plot.png")