### Dataset 2

In [12]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import linear_model
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.cross_validation import train_test_split, cross_val_score, StratifiedShuffleSplit 
from sklearn import preprocessing, metrics
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
import timeit
import matplotlib.pyplot as plt
import os
get_ipython().magic('matplotlib inline')

In [2]:
## Reading the file
def read_file(trainF,testF, Directory, Target_col,transform,drop_cols=None,categ_transform=None):
    train = pd.read_csv(Directory + trainF)
    test =  pd.read_csv(Directory + testF)
    if transform:
        lbl_enc = preprocessing.LabelEncoder()
        labels = train[Target_col].values
        labels = lbl_enc.fit_transform(labels)
        labels_test = test[Target_col].values
        labels_test = lbl_enc.fit_transform(labels_test)
        train.drop([Target_col],axis=1)
        test.drop([Target_col],axis=1)
        train[Target_col] = labels
        test[Target_col] = labels_test
    if drop_cols is not None:
        for i in drop_cols:
            train.drop([i],axis=1,inplace=True)
            test.drop([i],axis=1,inplace=True)
    if categ_transform is not None:
        for j in categ_transform:
            lbl_enc = preprocessing.LabelEncoder()
            labels = train[j].values
            labels = lbl_enc.fit_transform(labels)
            labels_test = test[j].values
            labels_test = lbl_enc.fit_transform(labels_test)
            train.drop([j],axis=1)
            test.drop([j],axis=1)
            train[j] = labels
            test[j] = labels_test
            
    return train, test

In [33]:
## SVM classifier
def svm_classifier(train, test, accuracy, roc_auc, Target_col):
    start_time = timeit.default_timer()
    y = train[Target_col]
    X = train.drop([Target_col],axis=1)
    test_labels = test[Target_col]
    test_X = test.drop([Target_col],axis=1)
    random_state = np.random.RandomState(0)
    # Binarize the output
    y = label_binarize(y, classes=np.unique(y))
    test_labels = label_binarize(test_labels, classes=np.unique(test_labels))
    n_classes = y.shape[1]
    classifier = OneVsRestClassifier(svm.SVC(probability=True))
    y_score = classifier.fit(X, y).decision_function(test_X)
    y_pred = classifier.predict(test_X)

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc_dict = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], y_score[:, i])
        roc_auc_dict[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_labels.ravel(), y_score.ravel())
    roc_auc_dict["micro"] = auc(fpr["micro"], tpr["micro"])
    roc_auc.append(roc_auc_dict["micro"])
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed


    """
    clf = svm.SVC(probability=True)
    clf.fit(X,y)
    y_pred = clf.predict(test_X)
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    probas_ = clf.predict_proba(test_X)
    # Compute ROC curve and area the curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(test_labels, probas_[:, 1])
    roc_auc.append(auc(false_positive_rate, true_positive_rate))
    return accuracy, roc_auc
    """

In [4]:
## RF classifier
def RF_classifier(train, test, accuracy, roc_auc, Target_col):
    start_time = timeit.default_timer()
    y = train[Target_col]
    X = train.drop([Target_col],axis=1)
    test_labels = test[Target_col]
    test_X = test.drop([Target_col],axis=1)
    
    ##Binarize the output
    y = label_binarize(y, classes=np.unique(y))
    test_labels = label_binarize(test_labels, classes=np.unique(test_labels))
    n_classes = y.shape[1]
    RF = OneVsRestClassifier(RandomForestClassifier(n_estimators = 200))
    #RF = RandomForestClassifier(n_estimators = 200, random_state = 0, 
    #            oob_score = True)
    
    RF.fit(X,y)
    y_score = RF.fit(X, y).predict_proba(test_X)
    y_pred = RF.predict(test_X)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc_dict = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], y_score[:, i])
        roc_auc_dict[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_labels.ravel(), y_score.ravel())
    roc_auc_dict["micro"] = auc(fpr["micro"], tpr["micro"])
    roc_auc.append(roc_auc_dict["micro"])
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed
    """
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    elapsed = (timeit.default_timer() - start_time)/60
    #return accuracy, roc_auc, elapsed
    #y_score = RF.predict_proba(test_X)
    
    probas_ = RF.predict_proba(test_X)
    
    # Compute ROC curve and area the curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(test_labels, probas_)[:, 1]
    roc_auc.append(auc(false_positive_rate, true_positive_rate))
    return accuracy, roc_auc, elapsed
    """
    

In [5]:
## Logistic Regression
def log_classifier(train, test, accuracy, roc_auc, Target_col):
    start_time = timeit.default_timer()
    y = train[Target_col]
    X = train.drop([Target_col],axis=1)
    test_labels = test[Target_col]
    test_X = test.drop([Target_col],axis=1)
    ##Binarize the output
    y = label_binarize(y, classes=np.unique(y))
    test_labels = label_binarize(test_labels, classes=np.unique(test_labels))
    n_classes = y.shape[1]
    #logreg = linear_model.LogisticRegression()
    logreg = OneVsRestClassifier(linear_model.LogisticRegression())
    logreg.fit(X,y)
    y_score = logreg.fit(X, y).predict_proba(test_X)
    y_pred = logreg.predict(test_X)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc_dict = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], y_score[:, i])
        roc_auc_dict[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_labels.ravel(), y_score.ravel())
    roc_auc_dict["micro"] = auc(fpr["micro"], tpr["micro"])
    roc_auc.append(roc_auc_dict["micro"])
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed
    """
    logreg.fit(X,y)
    y_pred = logreg.predict(test_X)
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))

    probas_ = logreg.predict_proba(test_X)
    # Compute ROC curve and area the curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(test_labels, probas_[:, 1])
    roc_auc.append(auc(false_positive_rate, true_positive_rate))

    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed
    """

In [6]:
## Decision Tree
def tree_classifier(train, test, accuracy, roc_auc, Target_col):
    start_time = timeit.default_timer()
    y = train[Target_col]
    X = train.drop([Target_col],axis=1)
    test_labels = test[Target_col]
    test_X = test.drop([Target_col],axis=1)
    ##Binarize the output
    y = label_binarize(y, classes=np.unique(y))
    test_labels = label_binarize(test_labels, classes=np.unique(test_labels))
    n_classes = y.shape[1]
    tree = OneVsRestClassifier(DecisionTreeClassifier(random_state=0))
    tree.fit(X,y)
    y_score = tree.fit(X, y).predict_proba(test_X)
    y_pred = tree.predict(test_X)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc_dict = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], y_score[:, i])
        roc_auc_dict[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_labels.ravel(), y_score.ravel())
    roc_auc_dict["micro"] = auc(fpr["micro"], tpr["micro"])
    roc_auc.append(roc_auc_dict["micro"])
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed
    """
    #tree = DecisionTreeClassifier(random_state=0)
    tree.fit(X,y)
    y_pred = tree.predict(test_X)
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    
    probas_ = tree.predict_proba(test_X)
    # Compute ROC curve and area the curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(test_labels, probas_)[:, 1]
    roc_auc.append(auc(false_positive_rate, true_positive_rate))
    
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed
    """

In [56]:
def model_build(filenum,Target_column, df_train, df_test, Directory,drop_cols=None,categ_transform=None):
    accuracy_svm = []; roc_auc_svm = []
    accuracy_RF = []; roc_auc_RF = []
    accuracy_log = []; roc_auc_log = []
    accuracy_tree = []; roc_auc_tree = []
    elapsed_time_svm = [];elapsed_time_RF = []; elapsed_time_log = []; elapsed_time_tree = []
    Target_col = Target_column
    for i in range(6,11):
        trainF = df_train+ str(i) + '.csv'
        testF = df_test + str(i) + '.csv'
        print("Executing this iteration:",i)
        train, test = read_file(trainF,testF,Directory, Target_col,transform=True,drop_cols=drop_cols,categ_transform=categ_transform)
        accuracy_svm, roc_auc_svm, elapsed = svm_classifier(train, test, accuracy_svm, roc_auc_svm, Target_col)
        elapsed_time_svm.append(elapsed)
        accuracy_RF, roc_auc_RF, elapsed = RF_classifier(train, test, accuracy_RF, roc_auc_RF, Target_col)
        elapsed_time_RF.append(elapsed)
        accuracy_log, roc_auc_log, elapsed = log_classifier(train, test, accuracy_log, roc_auc_log, Target_col)
        elapsed_time_log.append(elapsed)
        accuracy_tree, roc_auc_tree, elapsed = tree_classifier(train, test, accuracy_tree, roc_auc_tree, Target_col)
        elapsed_time_tree.append(elapsed)
    
    print('Data set# ' + str(filenum))
    print('********** SVM classifier ***********')
    print('Individual file accuracy for SVM')
    print(np.array(accuracy_svm))
    print('Individual time taken for SVM')
    print(np.array(elapsed_time_svm))
    print('Accuracy mean   ' + 'Accuracy Stdev  ')
    print(np.array(accuracy_svm).mean(), np.array(accuracy_svm).std())
    print('Individual file AUC for SVM')
    print(np.array(roc_auc_svm))
    print('AUC mean        ' + 'AUC      Stdev  ')
    print(np.array(roc_auc_svm).mean(), np.array(roc_auc_svm).std())
    print()
    print('********** RF classifier ************')
    print('Individual file accuracy for RF')
    print(np.array(accuracy_RF))
    print('Individual time taken for RF')
    print(np.array(elapsed_time_RF))
    print('Accuracy mean   ' + 'Accuracy Stdev  ')
    print(np.array(accuracy_RF).mean(), np.array(accuracy_RF).std())
    print('Individual file AUC for RF')
    print(np.array(roc_auc_RF))
    print('AUC mean        ' + 'AUC      Stdev  ')
    print(np.array(roc_auc_RF).mean(), np.array(roc_auc_RF).std())
    print()
    
    print('********** Logistic regression ******')
    print('Individual file accuracy for log')
    print(np.array(accuracy_log))
    print('Individual time taken for log')
    print(np.array(elapsed_time_log))
    print('Accuracy mean   ' + 'Accuracy Stdev  ')
    print(np.array(accuracy_log).mean(), np.array(accuracy_log).std())
    print('Individual file AUC for log')
    print(np.array(roc_auc_log))
    print('AUC mean        ' + 'AUC      Stdev  ')
    print(np.array(roc_auc_log).mean(), np.array(roc_auc_log).std())
    print()
    print('****** Decision Tree classifier *****')
    print('Individual file accuracy for Tree')
    print(np.array(accuracy_tree))
    print('Individual time taken for tree')
    print(np.array(elapsed_time_tree))
    print('Accuracy mean   ' + 'Accuracy Stdev  ')
    print(np.array(accuracy_tree).mean(), np.array(accuracy_tree).std())
    print('Individual file AUC for tree')
    print(np.array(roc_auc_tree))
    print('AUC mean        ' + 'AUC      Stdev  ')
    print(np.array(roc_auc_tree).mean(), np.array(roc_auc_tree).std())
    print()
    


In [8]:
model_build(filenum=2,Target_column='letter', df_train='data2_train', df_test='data2_test', Directory = "./Data Set 2/splits/")

Data set# 2
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.33287848  0.34013554  0.21624281  0.26099886  0.32236044  0.21955273
  0.29911934  0.32772472  0.30477486  0.30132951]
Individual time taken for SVM
[ 0.4629503   0.45574505  0.56967387  0.44674427  0.52146024  0.51562659
  0.5337757   0.52704446  0.51352353  0.49498482]
Accuracy mean   Accuracy Stdev  
0.292511728746 0.0429339311175
Individual file AUC for SVM
[ 0.93227143  0.92726223  0.91656861  0.92058832  0.93602424  0.92452642
  0.93639454  0.92503512  0.92754205  0.92123119]
AUC mean        AUC      Stdev  
0.926744414351 0.00624798905986

********** RF classifier ************
Individual file accuracy for RF
[ 0.78523236  0.78260542  0.78935816  0.78848647  0.7890625   0.78747356
  0.79456423  0.78566015  0.78739801  0.78199124]
Individual time taken for RF
[ 3.22356922  3.41561738  2.9248002   3.94464878  3.45899524  3.43185834
  3.43087167  3.43615795  3.4295094   3.36366764]
Accuracy mean  

In [38]:
model_build(filenum=4,Target_column='Activity', df_train='data4_train', df_test='data4_test', Directory = "./Data Set 4/splits/",drop_cols=['Tag_Identificator'],categ_transform=['Sequence_Name'])

Executing this iteration: 1
Executing this iteration: 2
Data set# 4
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.25835542  0.25887455]
Individual time taken for SVM
[  76.79176815  105.20483511]
Accuracy mean   Accuracy Stdev  
0.258614982163 0.000259564024496
Individual file AUC for SVM
[ 0.83057256  0.83064724]
AUC mean        AUC      Stdev  
0.830609898842 3.734322768e-05

********** RF classifier ************
Individual file accuracy for RF
[ 0.59912397  0.60267153]
Individual time taken for RF
[ 6.77095594  7.63278542]
Accuracy mean   Accuracy Stdev  
0.600897750576 0.00177378120014
Individual file AUC for RF
[ 0.95799414  0.95834771]
AUC mean        AUC      Stdev  
0.958170923449 0.000176783803983

********** Logistic regression ******
Individual file accuracy for log
[ 0.05764134  0.05546591]
Individual time taken for log
[ 0.07259994  0.0796795 ]
Accuracy mean   Accuracy Stdev  
0.0565536282155 0.00108771454066
Individual file AUC for log
[ 0.842

In [39]:
model_build(filenum=6,Target_column='Class', df_train='d6_train', df_test='d6_test', Directory  = "./Data Set 6/splits/",drop_cols=None,categ_transform=None)

Executing this iteration: 1
Executing this iteration: 2
Data set# 6
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.95802327  0.95892567]
Individual time taken for SVM
[ 26.11681806  23.4313102 ]
Accuracy mean   Accuracy Stdev  
0.958474467762 0.000451199761109
Individual file AUC for SVM
[ 0.99898143  0.99896341]
AUC mean        AUC      Stdev  
0.998972420248 9.01468319242e-06

********** RF classifier ************
Individual file accuracy for RF
[ 0.99910911  0.99921314]
Individual time taken for RF
[ 0.45365645  0.4089185 ]
Accuracy mean   Accuracy Stdev  
0.999161121727 5.20136666091e-05
Individual file AUC for RF
[ 0.99999972  0.99999968]
AUC mean        AUC      Stdev  
0.999999704341 2.03970560997e-08

********** Logistic regression ******
Individual file accuracy for log
[ 0.80421339  0.80509364]
Individual time taken for log
[ 0.05933265  0.05100563]
Accuracy mean   Accuracy Stdev  
0.804653515855 0.000440121032934
Individual file AUC for log
[ 0.99

In [41]:
model_build(filenum=7,Target_column='Class', df_train='d7_train', df_test='d7_test', Directory = "./Data Set 7/splits/",drop_cols=None,categ_transform=None)

Executing this iteration: 1
Executing this iteration: 2
Executing this iteration: 3
Executing this iteration: 4
Executing this iteration: 5
Executing this iteration: 6
Executing this iteration: 7
Executing this iteration: 8
Executing this iteration: 9
Executing this iteration: 10
Data set# 7
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
Individual time taken for SVM
[ 3.6660919   3.6196811   3.71757022  3.54940779  3.68547919  3.71381769
  3.57980511  3.82933183  3.82449794  3.34536824]
Accuracy mean   Accuracy Stdev  
0.0 0.0
Individual file AUC for SVM
[ 0.53402186  0.53967774  0.53528371  0.54013237  0.53685979  0.53474391
  0.53211638  0.5353484   0.54112735  0.53704218]
AUC mean        AUC      Stdev  
0.536635369139 0.00276027966934

********** RF classifier ************
Individual file accuracy for RF
[ 0.96711013  0.96737949  0.96685589  0.96596053  0.96787076  0.96813996
  0.97011453  0.96832332  0.96779476  0.

### Dataset#12
<lm> Issues were found with 6th split file for dataset# 12 which was dimensionality issues that SVM could not handle. 
<lm> 6th split file for train and test were ignored to process other files.

In [44]:
model_build(filenum=12,Target_column='TARGET', df_train='d12_original_train', df_test='d12_original_test', Directory = "./Original Dataset_12_Brain/",drop_cols=None,categ_transform=None)

Executing this iteration: 1
Executing this iteration: 2
Executing this iteration: 3
Executing this iteration: 4
Executing this iteration: 5
Data set# 12
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.25        0.2         0.4         0.1875      0.38461538]
Individual time taken for SVM
[ 0.00340304  0.0030392   0.00300438  0.00293143  0.00319624]
Accuracy mean   Accuracy Stdev  
0.284423076923 0.0906673156665
Individual file AUC for SVM
[ 0.984375    0.94        0.91444444  0.98535156  0.97781065]
AUC mean        AUC      Stdev  
0.960396331566 0.0283851541467

********** RF classifier ************
Individual file accuracy for RF
[ 0.41666667  0.13333333  0.4         0.4375      0.38461538]
Individual time taken for RF
[ 0.03655832  0.03605938  0.03475491  0.03435504  0.03586661]
Accuracy mean   Accuracy Stdev  
0.354423076923 0.1119335032
Individual file AUC for RF
[ 0.98003472  0.85888889  0.84833333  0.95458984  0.94970414]
AUC mean        AUC      Stdev

In [53]:
model_build(filenum=12,Target_column='TARGET', df_train='d12_original_train', df_test='d12_original_test', Directory = "./Original Dataset_12_Brain/",drop_cols=None,categ_transform=None)

Executing this iteration: 7
Executing this iteration: 8
Executing this iteration: 9
Executing this iteration: 10
Data set# 12
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.21428571  0.33333333  0.23529412  0.15      ]
Individual time taken for SVM
[ 0.00317301  0.00336861  0.00270748  0.0023528 ]
Accuracy mean   Accuracy Stdev  
0.233228291317 0.0657857446102
Individual file AUC for SVM
[ 0.96938776  0.99479167  0.92474048  0.93125   ]
AUC mean        AUC      Stdev  
0.955042476549 0.0285922970332

********** RF classifier ************
Individual file accuracy for RF
[ 0.35714286  0.41666667  0.23529412  0.15      ]
Individual time taken for RF
[ 0.03526442  0.04156801  0.03399893  0.03287343]
Accuracy mean   Accuracy Stdev  
0.289775910364 0.10385704166
Individual file AUC for RF
[ 0.93877551  0.93663194  0.8200692   0.8596875 ]
AUC mean        AUC      Stdev  
0.8887910397 0.0508844430117

********** Logistic regression ******
Individual file accuracy fo

### Dataset#14
<lm> Issues were found with 5th split file for dataset# 14 which was dimensionality issues that SVM could not handle. 
<lm> 5th split file for train and test were ignored to process other files.

In [47]:
model_build(filenum=14,Target_column='C2309', df_train='srbct_train', df_test='srbct_test', Directory = "./data14_srbct/",drop_cols=None,categ_transform=None)

Executing this iteration: 1
Executing this iteration: 2
Executing this iteration: 3
Executing this iteration: 4
Data set# 14
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.57894737  0.57894737  0.69565217  0.73684211]
Individual time taken for SVM
[ 0.00232053  0.00132176  0.00194394  0.00221679]
Accuracy mean   Accuracy Stdev  
0.647597254005 0.0701775113281
Individual file AUC for SVM
[ 1.          0.98891967  0.99873976  0.98522622]
AUC mean        AUC      Stdev  
0.993221412899 0.00630137824027

********** RF classifier ************
Individual file accuracy for RF
[ 0.78947368  0.78947368  0.73913043  0.68421053]
Individual time taken for RF
[ 0.02892325  0.02097511  0.02818616  0.02908609]
Accuracy mean   Accuracy Stdev  
0.75057208238 0.0434782608696
Individual file AUC for RF
[ 0.99630656  0.99307479  0.98960302  0.9787627 ]
AUC mean        AUC      Stdev  
0.989436767224 0.00660289056819

********** Logistic regression ******
Individual file accurac

In [57]:
model_build(filenum=14,Target_column='C2309', df_train='srbct_train', df_test='srbct_test', Directory = "./data14_srbct/",drop_cols=None,categ_transform=None)

Executing this iteration: 6
Executing this iteration: 7
Executing this iteration: 8
Executing this iteration: 9
Executing this iteration: 10
Data set# 14
********** SVM classifier ***********
Individual file accuracy for SVM
[ 0.52173913  0.55172414  0.66666667  0.60869565  0.68421053]
Individual time taken for SVM
[ 0.00195976  0.00172125  0.00228728  0.00201164  0.00214665]
Accuracy mean   Accuracy Stdev  
0.606607222704 0.0630055825429
Individual file AUC for SVM
[ 0.99558916  0.99088387  0.99897119  1.          0.98799631]
AUC mean        AUC      Stdev  
0.994688106065 0.00461790777133

********** RF classifier ************
Individual file accuracy for RF
[ 0.52173913  0.72413793  0.88888889  0.7826087   0.63157895]
Individual time taken for RF
[ 0.02998436  0.02660951  0.03017603  0.02848587  0.02873571]
Accuracy mean   Accuracy Stdev  
0.709790718676 0.125749700483
Individual file AUC for RF
[ 0.97731569  0.98236227  1.          0.99873976  0.98384118]
AUC mean        AUC      S

In [49]:
model_build(filenum=15,Target_column='TARGET', df_train='d15_original_train', df_test='d15_original_test', Directory = "./Original Dataset_15_Lymphoma/",drop_cols=None,categ_transform=None)

Executing this iteration: 1
Executing this iteration: 2
Executing this iteration: 3
Executing this iteration: 4
Executing this iteration: 5
Executing this iteration: 6
Executing this iteration: 7
Executing this iteration: 8
Executing this iteration: 9
Executing this iteration: 10
Data set# 15
********** SVM classifier ***********
Individual file accuracy for SVM
[ 1.          1.          0.84210526  1.          1.          1.          1.
  1.          1.          0.94444444]
Individual time taken for SVM
[ 0.0031176   0.0029898   0.00304559  0.00280167  0.00276098  0.00317125
  0.00270933  0.00299261  0.00256679  0.00316712]
Accuracy mean   Accuracy Stdev  
0.97865497076 0.0484366234817
Individual file AUC for SVM
[ 1.          1.          0.99722992  1.          1.          1.          1.
  1.          1.          1.        ]
AUC mean        AUC      Stdev  
0.99972299169 0.000831024930748

********** RF classifier ************
Individual file accuracy for RF
[ 0.88888889  0.95       