# Train all SVM predictors

In [1]:
from collections import Counter
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import warnings
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit, StratifiedKFold
from IPython.display import display
from scripts.random_profiles import RandomProfileGenerator
from scripts.profile_reader2 import ProfileReader
from scripts.svm_tools import plot_learning_curve, grid_search_svm, RS
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.externals import joblib

pd.options.display.max_rows = 30
pd.options.display.max_columns = 300
import matplotlib.pyplot as plt
%matplotlib inline

pr = ProfileReader(data_file='data\\Final_Berg JBS 2013 Supplemental Table 3_For SVM14Dec2017.xlsx',  
                       mechanism_file='data\\Final_Berg JBS 2013 Supplemental Table 3_For SVM14Dec2017 - Mechanisms.xlsx')

# read in data.
data = pr.get_profile(index=['mech'])
mc = pr.get_mechanism_count()

mechs = pr.get_mechanism_count()['Mechanism']

In [2]:
clfs = {}

# ensure reprodicible results
np.random.seed(442)
for mech in mechs[0:]:
     
    # get training data with 100 negative class
    X, y = pr.get_x_y(mech=mech, impute='group_mean', normalize='l2', prof_num=30)    
    X0 = X[y==1]
    
    # Synthetic Minority Oversampling Technique. Bring the positive class numbers up to the random negative class.
    k_n = min(Counter(y)[1]-1 ,5)
    #X, y = SMOTE(k_neighbors=k_n, kind='regular').fit_sample(X, y)
    
    best_clf = grid_search_svm(X, y, scorer=precision_score, parameters={'C': [500]})
        
    clfs.update({mech: best_clf})
    

joblib.dump(clfs, 'svm_classifiers.pkl') 

['svm_classifiers.pkl']

In [3]:
clfs = joblib.load('svm_classifiers.pkl') 

In [None]:
mechs