In [8]:
# Basic models
import traceback
import numpy as np
import pandas as pd

# Data operation models
from sklearn.model_selection import StratifiedKFold
from sklearn import cross_validation   #Additional scklearn functions
from sklearn.grid_search import GridSearchCV    #Performing grid search

# Classifier models
from sklearn.svm import SVC

# Evaluation models
import sklearn.metrics as sm

In [9]:
# Configurations for the model
def run_Config():
    data_name = 'Thoracic_Surgery_onehot.xlsx'
    k_holdout = 10
    k_cv = 5
    pars = {
            'kernel': ['rbf'],
            'C': [0.1,1,10,100],
            'gamma': [2,3],
            'class_weight':[{1:0.1,2:0.9},{1:0.2,2:0.8},{1:0.25,2:0.75},{1:0.4,2:0.6}]
            }
    return data_name, k_holdout, k_cv, pars

In [10]:
# Load data and run the holdout cross validation
def run_load_data(data_name):
    all = pd.read_excel(data_name)
    all = all.values
    all_fea = all[:,:-1]
    all_label = get_normal_label(all[:,-1])
    return all_fea, all_label

def get_normal_label(y):
    y_uni = np.unique(np.array(y))
    for i in xrange(len(y_uni)):
        y[np.nonzero(y == y_uni[i])[0]] = i+1
    return y

def run_holdout_validation(all_fea, all_label, k_holdout, k_cv, pars):
    # holdout cv
    i_t = 0
    res_list = []
    opt_pars_list = []
    skf = StratifiedKFold(n_splits = k_holdout, shuffle = True)
    for i_train, i_test in skf.split(all_fea, all_label):
        # times
        i_t = i_t + 1
        # print 'Round ', str(i_t), ' Holdout CV----------------------'
        # obtain current training and test data
        train_fea, test_fea = all_fea[i_train], all_fea[i_test]
        train_label, test_label = all_label[i_train], all_label[i_test]
        # learn the model
        pre_label_now, opt_pars_now = run_svm(pars, k_cv, train_fea, train_label, test_fea)
        # evaluate the prediction
        res_now = run_evaluation(pre_label_now, test_label)
        # save results
        res_list.append(res_now)
        opt_pars_list.append(opt_pars_now)
    return res_list, opt_pars_list
    

In [11]:
# Learn the model random forest
def run_svm(pars, k_cv, train_fea, train_label, test_fea):
    estimator = SVC()
    clf = GridSearchCV(estimator, pars, cv = k_cv)
    clf.fit(train_fea, train_label)
    pre_label = clf.predict(test_fea)
    # print 'SVM is running...'
    return pre_label, clf.best_params_
    

In [12]:
# Evaluate the performance
def run_evaluation(p, y):
    res_dict = {}
    i_pos = np.nonzero(y == max(y))
    i_neg = np.nonzero(y == min(y))
    res_dict['TPR'] = (1 - sm.hamming_loss(y[i_pos], p[i_pos]))*100
    res_dict['TNR'] = (1 - sm.hamming_loss(y[i_neg], p[i_neg]))*100
    res_dict['MAcc'] = np.mean([res_dict['TPR'], res_dict['TNR']])
    res_dict['GM'] = np.sqrt(res_dict['TPR']*res_dict['TNR'])
    res_dict['F1(Macro)'] = sm.f1_score(y, p, average='macro')*100
    res_dict['F1(Micro)'] = sm.f1_score(y, p, average='micro')*100
    res_dict['Acc'] = sm.accuracy_score(y, p)*100
    return res_dict

In [13]:
def get_dict(d):
    list_final = []
    for i in d:
        list_now = i + '_' + str(d[i])
        list_final.append(list_now)
    return list_final

In [None]:
if __name__ == '__main__':
    data_name, k_holdout, k_cv, pars = run_Config()
    all_fea, all_label = run_load_data(data_name)
    res_list, opt_pars_list = run_holdout_validation(all_fea, all_label, k_holdout, k_cv, pars)
    # print 'Each heldout cv result:'
    # print '-----------------------'
    for i, j in zip(res_list,opt_pars_list):
        print get_dict(i), 'with hyper-parameters:'
        print get_dict(j)
        print '-----------------------'
        

  'precision', 'predicted', average, warn_for)
