# Tuning and training the models for standard-pHLA-score

Input are the per-peptide-position ref2015 features for complex, already generated in ../Featurization/rosettaPPPEnergies.csv

We tune the parameters in the 5-fold-crossvalidation setting.

In [18]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
import time
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterSampler, cross_val_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from IPython.display import display
from scipy import stats
import _pickle as cPickle
import statistics
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

## Load the standard ref2015 features

### Full dataset

In [19]:
## 1 - load the energies
def pppene_to_array(tmp):
    tmp = tmp.replace("(", "")
    tmp = tmp.replace(")", "")
    tmp = tmp.strip("[]")
    tmp = tmp.replace(" ", "")
    tmp = tmp.replace("\n", ",")
    return np.fromstring(tmp, dtype=float, sep=", ").reshape(9,20)

ppp_ene = pd.read_csv("../Featurization/rosettaPPPEnergies.csv")
ppp_ene = ppp_ene[["allele", "peptide", "binder", "ba", "energies", "total_energy"]]
ppp_ene["energies"] = ppp_ene["energies"].apply(pppene_to_array)
ppp_ene

Unnamed: 0,allele,peptide,binder,ba,energies,total_energy
0,A0101,YLEQLHQLY,1,0.574375,"[[-9.37669305, 4.67802037, 8.54111444, 10.3367...",112.623867
1,A0101,HSERHVLLY,1,0.574375,"[[-7.89190954, 0.93707113, 10.06233605, 2.7158...",91.902185
2,A0101,MTDPEMVEV,1,0.574375,"[[-8.25236275, 10.56587939, 7.5793572, 1.14710...",146.451590
3,A0101,LTDFIREEY,1,0.574375,"[[-8.43720197, 10.21830335, 7.113905, 36.51672...",138.735082
4,A0101,LLDQRPAWY,1,0.574375,"[[-8.18944861, 12.37002534, 6.69837214, 8.0238...",142.756344
...,...,...,...,...,...,...
77576,C1601,QQTTTSFQN,0,0.000000,"[[-8.23468271, 4.14167228, 9.40648541, 38.0082...",128.349704
77577,C1601,QQVEQMEIP,0,0.000000,"[[-9.20362039, 22.89414572, 10.40234849, 35.70...",159.872992
77578,C1601,QQWQVFSAE,0,0.000000,"[[-8.46025926, 3.82365938, 10.25920191, 61.414...",97.152888
77579,C1601,QRCVVLRFL,0,0.000000,"[[-7.11160825, 1.35151526, 9.62650699, 20.2230...",116.714004


### Map the full dataset to the training set

In [20]:
#Load split
train_set = pd.read_csv("../Datasets/train_set.csv")
train_set = train_set[["allele", "peptide", "fileloc", "allele_type", "fold_num"]]

#Merge to form the training set
train_dataset = pd.merge(ppp_ene, train_set, on=["allele", "peptide"], suffixes=["", "_y"], how="inner")
train_dataset

Unnamed: 0,allele,peptide,binder,ba,energies,total_energy,fileloc,allele_type,fold_num
0,A0101,YLEQLHQLY,1,0.574375,"[[-9.37669305, 4.67802037, 8.54111444, 10.3367...",112.623867,/home/anja/Documents/jayvee_data/singleconf/al...,HLA-A,2.0
1,A0101,HSERHVLLY,1,0.574375,"[[-7.89190954, 0.93707113, 10.06233605, 2.7158...",91.902185,/home/anja/Documents/jayvee_data/singleconf/al...,HLA-A,0.0
2,A0101,MTDPEMVEV,1,0.574375,"[[-8.25236275, 10.56587939, 7.5793572, 1.14710...",146.451590,/home/anja/Documents/jayvee_data/singleconf/al...,HLA-A,0.0
3,A0101,LTDFIREEY,1,0.574375,"[[-8.43720197, 10.21830335, 7.113905, 36.51672...",138.735082,/home/anja/Documents/jayvee_data/singleconf/al...,HLA-A,1.0
4,A0101,LLDQRPAWY,1,0.574375,"[[-8.18944861, 12.37002534, 6.69837214, 8.0238...",142.756344,/home/anja/Documents/jayvee_data/singleconf/al...,HLA-A,1.0
...,...,...,...,...,...,...,...,...,...
69793,C1601,QQTTTSFQN,0,0.000000,"[[-8.23468271, 4.14167228, 9.40648541, 38.0082...",128.349704,/home/anja/Documents/COMP590P/C_decoys/confs/C...,HLA-C,4.0
69794,C1601,QQVEQMEIP,0,0.000000,"[[-9.20362039, 22.89414572, 10.40234849, 35.70...",159.872992,/home/anja/Documents/COMP590P/C_decoys/confs/C...,HLA-C,4.0
69795,C1601,QQWQVFSAE,0,0.000000,"[[-8.46025926, 3.82365938, 10.25920191, 61.414...",97.152888,/home/anja/Documents/COMP590P/C_decoys/confs/C...,HLA-C,1.0
69796,C1601,QRCVVLRFL,0,0.000000,"[[-7.11160825, 1.35151526, 9.62650699, 20.2230...",116.714004,/home/anja/Documents/COMP590P/C_decoys/confs/C...,HLA-C,2.0


In [21]:
# extracting features in training format
# and get the cross-validation iterator
def get_energies(X):
    ene = np.roll(X, 4, axis = 0)[:9,:19]
    ene = np.roll(ene, -4, axis = 0)
    ene = ene.reshape(9*19)
    return ene

def extract_features_Xy_cv(merged_df, allele):
    allele_data = merged_df[merged_df["allele"]==allele]
    allele_data["enefeat"] = allele_data["energies"].apply(get_energies)
    allele_data = allele_data.reset_index(drop=True)
    flag = 0
    for index, row in allele_data.iterrows():
        if flag == 0:
            X = np.array(row['enefeat'])
            flag = 1
        else: 
            X = np.vstack((X, row['enefeat']))
    #extract binding energies        
    y = np.array(list(allele_data["ba"]))
    y_l = np.array(list(allele_data["binder"]))
    
    cv_iter = []
    for split in range(5):
        test_ind = allele_data.index[(allele_data['fold_num'] == split)].tolist()
        train_ind = allele_data.index[~(allele_data['fold_num'] == split)].tolist()
        cv_iter.append((train_ind, test_ind))
        
    return (X, y, y_l, cv_iter)
    


In [22]:
def param_tune_allele(allele, train_dataset):
    
    allele_td = train_dataset[train_dataset["allele"]==allele]
    (X_train, y_train, y_l, cv) = extract_features_Xy_cv(allele_td, allele)

    grid_params = {'C': [1e-2, 1, 10], 
            'kernel': ["linear", "rbf"], 
            'degree': [3, 5, 7],
                   
            }
    
    regr_results = {}
    best_cv_mscore = 0
    best_cv_scores = None
    best_cv_params = None
    regr_svr = SVR()
    
    for i, g in enumerate(ParameterSampler(grid_params, n_iter=60)):
        print("CV")
        print(g)
        #cross-validation
        regr_svr.set_params(**g)
        regr = make_pipeline(StandardScaler(), regr_svr)
        cv_scores = cross_val_score(regr, X_train, y_train, cv=cv, n_jobs = -1)
        cv_mscore = statistics.mean(cv_scores)
        print(cv_scores)
        print(cv_mscore)
        if cv_mscore > best_cv_mscore:
            best_cv_params = g
            best_cv_scores = cv_scores
  
    print("Best CV "+str(best_cv_mscore))
    print(best_cv_params)
    return (None, None, best_cv_params, best_cv_scores)
            
    
'''
    regr_oob = RandomForestRegressor(n_jobs=-1)
    regr_cv = RandomForestRegressor(n_jobs=-1)

    best_oob_score = 0 
    best_oob_params = None
    best_cv_mscore = 0
    best_cv_scores = None
    best_cv_params = None
    for i, g in enumerate(ParameterSampler(grid_params, n_iter=100)):
        print("Parameter iteration: "+str(i))
        print("OOB")
        #out of bag
        print(g)
        regr_oob.set_params(**g)
        regr_oob.fit(X_train_s,y_train_s)
        print(regr_oob.oob_score_)
        if regr_oob.oob_score_ > best_oob_score:
            best_oob_params = g
            best_oob_score = regr_oob.oob_score_
        
        print("CV")
        #cross-validation
        regr_cv.set_params(**g)
        cv_scores = cross_val_score(regr_cv, X_train, y_train, cv=cv, n_jobs = -1)
        cv_mscore = statistics.mean(cv_scores)
        print(cv_scores)
        print(cv_mscore)
        if cv_mscore > best_cv_mscore:
            best_cv_params = g
            best_cv_scores = cv_scores
            best_cv_mscore = cv_mscore
            
    print("Best OOB "+str(best_oob_score))
    print(best_oob_params)
    print("Best CV "+str(best_cv_mscore))
    print(best_cv_params)
    return (best_oob_params, best_oob_score, best_cv_params, best_cv_scores)

'''

'\n    regr_oob = RandomForestRegressor(n_jobs=-1)\n    regr_cv = RandomForestRegressor(n_jobs=-1)\n\n    best_oob_score = 0 \n    best_oob_params = None\n    best_cv_mscore = 0\n    best_cv_scores = None\n    best_cv_params = None\n    for i, g in enumerate(ParameterSampler(grid_params, n_iter=100)):\n        print("Parameter iteration: "+str(i))\n        print("OOB")\n        #out of bag\n        print(g)\n        regr_oob.set_params(**g)\n        regr_oob.fit(X_train_s,y_train_s)\n        print(regr_oob.oob_score_)\n        if regr_oob.oob_score_ > best_oob_score:\n            best_oob_params = g\n            best_oob_score = regr_oob.oob_score_\n        \n        print("CV")\n        #cross-validation\n        regr_cv.set_params(**g)\n        cv_scores = cross_val_score(regr_cv, X_train, y_train, cv=cv, n_jobs = -1)\n        cv_mscore = statistics.mean(cv_scores)\n        print(cv_scores)\n        print(cv_mscore)\n        if cv_mscore > best_cv_mscore:\n            best_cv_params 

## Crossvalidation

In [23]:
alleles = train_dataset["allele"].unique()
results = {"allele":[], "best_oob_param":[], "best_oob_score":[], "best_cv_param":[], "best_cv_scores":[]}

for allele in alleles:
    print("------------------------------------------------------------------------")
    print("ALLELE")
    print(allele)
    res = param_tune_allele(allele, train_dataset)
    results["allele"].append(allele)
    results["best_oob_param"].append(res[0])
    results["best_oob_score"].append(res[1])
    results["best_cv_param"].append(res[2])
    results["best_cv_scores"].append(res[3])
    train_best_model(allele, res[2], "ppp")   

------------------------------------------------------------------------
ALLELE
A0101
CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}




[0.67209626 0.75413053 0.69391555 0.69255731 0.65171135]
0.6928821997753976
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.55903089 0.62856057 0.57230845 0.58664457 0.59852334]
0.5890135639251914
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.67209626 0.75413053 0.69391555 0.69255731 0.65171135]
0.6928821997753976
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.55903089 0.62856057 0.57230845 0.58664457 0.59852334]
0.5890135639251914
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.67209626 0.75413053 0.69391555 0.69255731 0.65171135]
0.6928821997753976
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.55903089 0.62856057 0.57230845 0.58664457 0.59852334]
0.5890135639251914
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.66757245 0.75311478 0.69343655 0.69298251 0.65351409]
0.6921240747899696
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.67891497 0.74333672 0.70935813 0.72377784 0.74174562]
0.7194266561878558
CV
{'kernel': 'linear', 'degree': 5, 'C': 1}
[0.66757245 0.75311478 0.6934



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.66504941 0.64615058 0.62535164 0.65931394 0.64448001]
0.6480691186472444
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.55729867 0.54429164 0.53892199 0.5540556  0.54087458]
0.5470884983750306
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.66504941 0.64615058 0.62535164 0.65931394 0.64448001]
0.6480691186472444
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.55729867 0.54429164 0.53892199 0.5540556  0.54087458]
0.5470884983750306
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.66504941 0.64615058 0.62535164 0.65931394 0.64448001]
0.6480691186472444
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.55729867 0.54429164 0.53892199 0.5540556  0.54087458]
0.5470884983750306
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.6659006  0.64604409 0.6252211  0.65893109 0.64511754]
0.6482428805279048
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.66129375 0.64665134 0.64018664 0.65961897 0.64660568]
0.6508712745175759
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.63751875 0.69820724 0.66207615 0.67395198 0.71026873]
0.6764045700294129
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.51445616 0.5454747  0.50900397 0.54561176 0.52928785]
0.5287668902669216
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.63751875 0.69820724 0.66207615 0.67395198 0.71026873]
0.6764045700294129
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.51445616 0.5454747  0.50900397 0.54561176 0.52928785]
0.5287668902669216
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.63751875 0.69820724 0.66207615 0.67395198 0.71026873]
0.6764045700294129
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.51445616 0.5454747  0.50900397 0.54561176 0.52928785]
0.5287668902669216
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.63989945 0.7007799  0.6578316  0.67316784 0.71307927]
0.6769516111821059
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.66643825 0.70666776 0.66694376 0.70512674 0.70446685]
0.6899286714682266
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.64616782 0.64747742 0.64042754 0.61187892 0.69062091]
0.6473145231546944
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.34076421 0.33029055 0.3110243  0.32116496 0.35997173]
0.33264315074315337
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.64616782 0.64747742 0.64042754 0.61187892 0.69062091]
0.6473145231546944
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.34076421 0.33029055 0.3110243  0.32116496 0.35997173]
0.33264315074315337
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.64616782 0.64747742 0.64042754 0.61187892 0.69062091]
0.6473145231546944
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.34076421 0.33029055 0.3110243  0.32116496 0.35997173]
0.33264315074315337
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.63523407 0.65201583 0.63888453 0.60962272 0.67172711]
0.641496853181031
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.65126782 0.67317919 0.62462361 0.59171342 0.69634607]
0.6474260210168511
CV
{'kernel': 'linear', 



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.6540055  0.68890848 0.63848931 0.65096957 0.67974393]
0.6624233562678612
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.48800869 0.50432136 0.47876951 0.49947133 0.50363006]
0.49484018883011416
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.6540055  0.68890848 0.63848931 0.65096957 0.67974393]
0.6624233562678612
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.48800869 0.50432136 0.47876951 0.49947133 0.50363006]
0.49484018883011416
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.6540055  0.68890848 0.63848931 0.65096957 0.67974393]
0.6624233562678612
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.48800869 0.50432136 0.47876951 0.49947133 0.50363006]
0.49484018883011416
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.65485475 0.68845494 0.64012528 0.65291339 0.68002985]
0.663275641617478
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.64075104 0.65395318 0.63439459 0.64791732 0.645443  ]
0.6444918242634265
CV
{'kernel': 'linear', 



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.60420057 0.77658632 0.78179558 0.78609153 0.75368626]
0.7404720514581442
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.57140391 0.55453497 0.55227821 0.54353702 0.54785855]
0.5539225314036602
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.60420057 0.77658632 0.78179558 0.78609153 0.75368626]
0.7404720514581442
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.57140391 0.55453497 0.55227821 0.54353702 0.54785855]
0.5539225314036602
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.60420057 0.77658632 0.78179558 0.78609153 0.75368626]
0.7404720514581442
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.57140391 0.55453497 0.55227821 0.54353702 0.54785855]
0.5539225314036602
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.61056913 0.77489927 0.78036875 0.78577561 0.75165818]
0.740654186103075
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.74963767 0.72216638 0.71695377 0.71753339 0.71316529]
0.7238913006931988
CV
{'kernel': 'linear', 'de



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.69783492 0.66695408 0.66631305 0.72384488 0.74466357]
0.6999220981457303
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.32145514 0.37226293 0.33413913 0.37560952 0.35195104]
0.35108355325428026
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.69783492 0.66695408 0.66631305 0.72384488 0.74466357]
0.6999220981457303
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.32145514 0.37226293 0.33413913 0.37560952 0.35195104]
0.35108355325428026
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.69783492 0.66695408 0.66631305 0.72384488 0.74466357]
0.6999220981457303
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.32145514 0.37226293 0.33413913 0.37560952 0.35195104]
0.35108355325428026
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.6862832  0.66477205 0.66338889 0.72052329 0.74414203]
0.6958218924915234
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.62399491 0.65292998 0.61545758 0.65028877 0.64558314]
0.6376508751883386
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.61651352 0.57111563 0.51697703 0.60483076 0.58121174]
0.5781297364317224
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.13317977 0.12664247 0.12994486 0.13329376 0.12677475]
0.12996712377326985
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.61651352 0.57111563 0.51697703 0.60483076 0.58121174]
0.5781297364317224
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.13317977 0.12664247 0.12994486 0.13329376 0.12677475]
0.12996712377326985
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.61651352 0.57111563 0.51697703 0.60483076 0.58121174]
0.5781297364317224
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.13317977 0.12664247 0.12994486 0.13329376 0.12677475]
0.12996712377326985
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.61250081 0.57404771 0.50824589 0.59909436 0.5787325 ]
0.574524254399122
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.54976676 0.52593082 0.50169767 0.52633912 0.50543298]
0.5218334713756612
CV
{'kernel': 'linear', 



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.59858541 0.62278201 0.55252526 0.57049843 0.54285915]
0.5774500513894802
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.35698572 0.37281115 0.32048268 0.34531148 0.32174469]
0.34346714462433164
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.59858541 0.62278201 0.55252526 0.57049843 0.54285915]
0.5774500513894802
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.35698572 0.37281115 0.32048268 0.34531148 0.32174469]
0.34346714462433164
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.59858541 0.62278201 0.55252526 0.57049843 0.54285915]
0.5774500513894802
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.35698572 0.37281115 0.32048268 0.34531148 0.32174469]
0.34346714462433164
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.59826562 0.6204209  0.5479704  0.56837754 0.53944526]
0.5748959419263132
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.61509578 0.63154779 0.59804729 0.61216768 0.56445265]
0.604262237891165
CV
{'kernel': 'linear', 



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.63018125 0.66081596 0.44129023 0.64926823 0.60092026]
0.5964951849682775
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.3147378  0.32951658 0.32325986 0.34066304 0.31962716]
0.3255608871543989
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.63018125 0.66081596 0.44129023 0.64926823 0.60092026]
0.5964951849682775
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.3147378  0.32951658 0.32325986 0.34066304 0.31962716]
0.3255608871543989
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.63018125 0.66081596 0.44129023 0.64926823 0.60092026]
0.5964951849682775
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.3147378  0.32951658 0.32325986 0.34066304 0.31962716]
0.3255608871543989
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.62430708 0.66088335 0.40615638 0.63529805 0.58355364]
0.5820396996455424
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.58337275 0.64501381 0.63293697 0.62958955 0.60488821]
0.6191602580438176
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.67781849 0.64568117 0.71760816 0.16609406 0.70485837]
0.5824120488604674
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.45197679 0.43866918 0.4419554  0.42270088 0.47285491]
0.44563143445870096
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.67781849 0.64568117 0.71760816 0.16609406 0.70485837]
0.5824120488604674
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.45197679 0.43866918 0.4419554  0.42270088 0.47285491]
0.44563143445870096
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.67781849 0.64568117 0.71760816 0.16609406 0.70485837]
0.5824120488604674
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.45197679 0.43866918 0.4419554  0.42270088 0.47285491]
0.44563143445870096
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.67213563 0.64030919 0.71309388 0.22305325 0.70032387]
0.5897831633640475
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.68358509 0.65216362 0.66305521 0.62800252 0.65159982]
0.6556812506464188
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.80041027 0.74380207 0.79231962 0.72407017 0.72790235]
0.7577008940694582
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.35158984 0.31460793 0.33714378 0.30462209 0.32315685]
0.3262240990756931
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.80041027 0.74380207 0.79231962 0.72407017 0.72790235]
0.7577008940694582
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.35158984 0.31460793 0.33714378 0.30462209 0.32315685]
0.3262240990756931
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.80041027 0.74380207 0.79231962 0.72407017 0.72790235]
0.7577008940694582
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.35158984 0.31460793 0.33714378 0.30462209 0.32315685]
0.3262240990756931
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.79801252 0.74071486 0.78833513 0.71887921 0.67227184]
0.7436427108323663
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.76578985 0.68257071 0.72593444 0.6836593  0.71843369]
0.715277597191065
CV
{'kernel': 'linear', 'de



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.55890545 0.53503514 0.54638077 0.45262594 0.56879515]
0.5323484908615598
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.40822776 0.36088679 0.36489407 0.32215524 0.37230124]
0.3656930185356846
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.55890545 0.53503514 0.54638077 0.45262594 0.56879515]
0.5323484908615598
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.40822776 0.36088679 0.36489407 0.32215524 0.37230124]
0.3656930185356846
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.55890545 0.53503514 0.54638077 0.45262594 0.56879515]
0.5323484908615598
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.40822776 0.36088679 0.36489407 0.32215524 0.37230124]
0.3656930185356846
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.58465971 0.5211992  0.53607736 0.45165483 0.56986954]
0.5326921297282272
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.61765241 0.55677815 0.5584312  0.47421449 0.53127484]
0.5476702186295895
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.65010653 0.63564748 0.66096004 0.68816803 0.6327041 ]
0.6535172356435623
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.42836205 0.37789684 0.3887418  0.4143841  0.39135228]
0.4001474142118532
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.65010653 0.63564748 0.66096004 0.68816803 0.6327041 ]
0.6535172356435623
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.42836205 0.37789684 0.3887418  0.4143841  0.39135228]
0.4001474142118532
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.65010653 0.63564748 0.66096004 0.68816803 0.6327041 ]
0.6535172356435623
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.42836205 0.37789684 0.3887418  0.4143841  0.39135228]
0.4001474142118532
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.64508084 0.63581328 0.6605019  0.68515239 0.63231174]
0.6517720287567865
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.65358652 0.64513435 0.65512358 0.66517008 0.63358832]
0.6505205708381272
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.57988678 0.66802701 0.63124144 0.68003814 0.65254732]
0.6423481375270189
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.39783345 0.44533106 0.42460654 0.4385579  0.43184392]
0.4276345759266263
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.57988678 0.66802701 0.63124144 0.68003814 0.65254732]
0.6423481375270189
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.39783345 0.44533106 0.42460654 0.4385579  0.43184392]
0.4276345759266263
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.57988678 0.66802701 0.63124144 0.68003814 0.65254732]
0.6423481375270189
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.39783345 0.44533106 0.42460654 0.4385579  0.43184392]
0.4276345759266263
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.58906188 0.66455574 0.62546355 0.67993578 0.65048812]
0.6419010137404061
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.63274081 0.68809665 0.64448751 0.66840285 0.6400919 ]
0.6547639449074673
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.53186156 0.53209484 0.48463667 0.59400428 0.4393424 ]
0.5163879505063309
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.12132274 0.11943662 0.12220082 0.13312278 0.1240251 ]
0.12402161359004496
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.53186156 0.53209484 0.48463667 0.59400428 0.4393424 ]
0.5163879505063309
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.12132274 0.11943662 0.12220082 0.13312278 0.1240251 ]
0.12402161359004496
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.53186156 0.53209484 0.48463667 0.59400428 0.4393424 ]
0.5163879505063309
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.12132274 0.11943662 0.12220082 0.13312278 0.1240251 ]
0.12402161359004496
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.53458184 0.52665787 0.48341038 0.59872906 0.43304105]
0.5152840383061289
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.51593507 0.51336021 0.52530471 0.55632373 0.49761598]
0.5217079392386215
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.69416997 0.72270767 0.70653662 0.72990471 0.69002601]
0.7086689962726905
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.44723705 0.43782122 0.44394719 0.47724164 0.47133674]
0.4555167693249277
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.69416997 0.72270767 0.70653662 0.72990471 0.69002601]
0.7086689962726905
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.44723705 0.43782122 0.44394719 0.47724164 0.47133674]
0.4555167693249277
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.69416997 0.72270767 0.70653662 0.72990471 0.69002601]
0.7086689962726905
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.44723705 0.43782122 0.44394719 0.47724164 0.47133674]
0.4555167693249277
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.69190192 0.72338545 0.7060677  0.73025137 0.68945549]
0.7082123857465171
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.66725984 0.70563286 0.67626435 0.71142022 0.69390199]
0.6908958487499074
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.68844175 0.6562475  0.75602593 0.7017942  0.67560123]
0.6956221229026186
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.43440488 0.42227139 0.46043579 0.43801971 0.4296513 ]
0.43695661481219805
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.68844175 0.6562475  0.75602593 0.7017942  0.67560123]
0.6956221229026186
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.43440488 0.42227139 0.46043579 0.43801971 0.4296513 ]
0.43695661481219805
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.68844175 0.6562475  0.75602593 0.7017942  0.67560123]
0.6956221229026186
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.43440488 0.42227139 0.46043579 0.43801971 0.4296513 ]
0.43695661481219805
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.68382995 0.65565705 0.75625558 0.70157716 0.67426112]
0.6943161714781033
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.65062537 0.63293209 0.683857   0.63939372 0.61388628]
0.6441388920476472
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.56083983 0.54415214 0.57029662 0.52850828 0.59563529]
0.5598864300417737
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.2954591  0.3010811  0.29232517 0.30854821 0.31794249]
0.3030712141042382
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.56083983 0.54415214 0.57029662 0.52850828 0.59563529]
0.5598864300417737
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.2954591  0.3010811  0.29232517 0.30854821 0.31794249]
0.3030712141042382
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.56083983 0.54415214 0.57029662 0.52850828 0.59563529]
0.5598864300417737
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.2954591  0.3010811  0.29232517 0.30854821 0.31794249]
0.3030712141042382
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.55391584 0.53971254 0.5656148  0.53279347 0.59246931]
0.5569011908568056
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.58560272 0.59664117 0.56287483 0.57680514 0.59805385]
0.5839955406182556
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.57860006 0.58575906 0.61456923 0.48746398 0.60881356]
0.575041177293468
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.24204496 0.2365538  0.23013477 0.22634687 0.21958668]
0.2309334178347064
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.57860006 0.58575906 0.61456923 0.48746398 0.60881356]
0.575041177293468
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.24204496 0.2365538  0.23013477 0.22634687 0.21958668]
0.2309334178347064
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.57860006 0.58575906 0.61456923 0.48746398 0.60881356]
0.575041177293468
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.24204496 0.2365538  0.23013477 0.22634687 0.21958668]
0.2309334178347064
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.57355552 0.57279987 0.62714806 0.44063705 0.5980572 ]
0.5624395409593679
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.6348799  0.6025926  0.62244862 0.57067028 0.58803709]
0.6037256964307621
CV
{'kernel': 'linear', 'degr



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.70355266 0.70315303 0.70344159 0.73014547 0.70856742]
0.7097720337228037
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.4193747  0.48789735 0.46886868 0.48647256 0.4848233 ]
0.46948731953317124
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.70355266 0.70315303 0.70344159 0.73014547 0.70856742]
0.7097720337228037
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.4193747  0.48789735 0.46886868 0.48647256 0.4848233 ]
0.46948731953317124
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.70355266 0.70315303 0.70344159 0.73014547 0.70856742]
0.7097720337228037
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.4193747  0.48789735 0.46886868 0.48647256 0.4848233 ]
0.46948731953317124
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.70682124 0.69055737 0.70128603 0.7329531  0.71020463]
0.7083644750035701
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.6377531  0.68414096 0.66825555 0.69894971 0.65034852]
0.6678895682251227
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.4116717  0.51841882 0.56469348 0.60556715 0.60615195]
0.5413006194650353
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.07257009 0.07209154 0.07501957 0.07431981 0.07368686]
0.07353757539209933
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.4116717  0.51841882 0.56469348 0.60556715 0.60615195]
0.5413006194650353
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.07257009 0.07209154 0.07501957 0.07431981 0.07368686]
0.07353757539209933
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.4116717  0.51841882 0.56469348 0.60556715 0.60615195]
0.5413006194650353
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.07257009 0.07209154 0.07501957 0.07431981 0.07368686]
0.07353757539209933
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.48086324 0.49997489 0.53611096 0.59442787 0.57280731]
0.5368368553589172
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.38972934 0.47811203 0.44854351 0.42924618 0.4299026 ]
0.435106734392707
CV
{'kernel': 'linear', 



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.66989889 0.70563899 0.67093445 0.63634586 0.59151362]
0.6548663650408099
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.14665629 0.1484356  0.14905562 0.13111062 0.14551221]
0.14415406665641847
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.66989889 0.70563899 0.67093445 0.63634586 0.59151362]
0.6548663650408099
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.14665629 0.1484356  0.14905562 0.13111062 0.14551221]
0.14415406665641847
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.66989889 0.70563899 0.67093445 0.63634586 0.59151362]
0.6548663650408099
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.14665629 0.1484356  0.14905562 0.13111062 0.14551221]
0.14415406665641847
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.66896037 0.70797383 0.67786783 0.63733526 0.59214493]
0.6568564430928375
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.61774349 0.60165412 0.6133139  0.54194967 0.57104485]
0.5891412058097575
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.59526278 0.55512268 0.46809543 0.57499167 0.59063525]
0.5568215591465419
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.39197579 0.35465668 0.37742165 0.38471789 0.3508015 ]
0.37191470030603435
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.59526278 0.55512268 0.46809543 0.57499167 0.59063525]
0.5568215591465419
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.39197579 0.35465668 0.37742165 0.38471789 0.3508015 ]
0.37191470030603435
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.59526278 0.55512268 0.46809543 0.57499167 0.59063525]
0.5568215591465419
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.39197579 0.35465668 0.37742165 0.38471789 0.3508015 ]
0.37191470030603435
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.59482114 0.56003811 0.42921445 0.5761685  0.59181398]
0.5504112386570224
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.61986854 0.61899474 0.57625155 0.59064146 0.59121507]
0.5993942710473239
CV
{'kernel': 'linear',



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.63439093 0.64890343 0.62004664 0.64967711 0.60375244]
0.6313541099964057
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.46981854 0.47271678 0.47128908 0.45950468 0.45110689]
0.4648871962421794
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.63439093 0.64890343 0.62004664 0.64967711 0.60375244]
0.6313541099964057
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.46981854 0.47271678 0.47128908 0.45950468 0.45110689]
0.4648871962421794
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.63439093 0.64890343 0.62004664 0.64967711 0.60375244]
0.6313541099964057
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.46981854 0.47271678 0.47128908 0.45950468 0.45110689]
0.4648871962421794
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.63783439 0.65054949 0.61618059 0.648169   0.60073116]
0.6306929264282962
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.61210581 0.64737519 0.62401606 0.62712654 0.59337217]
0.6207991535645817
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.70054886 0.7498595  0.56458811 0.71854967 0.73167813]
0.6930448556488015
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.40367421 0.41407105 0.36578197 0.39904923 0.39447918]
0.3954111301422543
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.70054886 0.7498595  0.56458811 0.71854967 0.73167813]
0.6930448556488015
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.40367421 0.41407105 0.36578197 0.39904923 0.39447918]
0.3954111301422543
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.70054886 0.7498595  0.56458811 0.71854967 0.73167813]
0.6930448556488015
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.40367421 0.41407105 0.36578197 0.39904923 0.39447918]
0.3954111301422543
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.70751346 0.73955161 0.32106012 0.7108485  0.59842128]
0.6154789950118772
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.70555935 0.73545374 0.68143261 0.695572   0.71170699]
0.7059449399772947
CV
{'kernel': 'linear', 'd



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.79683542 0.79358481 0.80715164 0.79055003 0.72048923]
0.7817222255595833
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.64700959 0.61857364 0.61778449 0.63088665 0.60736533]
0.6243239395072814
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.79683542 0.79358481 0.80715164 0.79055003 0.72048923]
0.7817222255595833
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.64700959 0.61857364 0.61778449 0.63088665 0.60736533]
0.6243239395072814
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.79683542 0.79358481 0.80715164 0.79055003 0.72048923]
0.7817222255595833
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.64700959 0.61857364 0.61778449 0.63088665 0.60736533]
0.6243239395072814
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.7967037  0.7891629  0.80487006 0.79150752 0.62795783]
0.762040401702248
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.80344563 0.79480268 0.79312691 0.79385455 0.75870478]
0.7887869109319378
CV
{'kernel': 'linear', 'de



CV
{'kernel': 'linear', 'degree': 3, 'C': 0.01}
[0.62777685 0.71075077 0.60825517 0.6422321  0.56745082]
0.6312931435544682
CV
{'kernel': 'rbf', 'degree': 3, 'C': 0.01}
[0.42862782 0.48896798 0.45951448 0.46520678 0.48593708]
0.4656508287503232
CV
{'kernel': 'linear', 'degree': 5, 'C': 0.01}
[0.62777685 0.71075077 0.60825517 0.6422321  0.56745082]
0.6312931435544682
CV
{'kernel': 'rbf', 'degree': 5, 'C': 0.01}
[0.42862782 0.48896798 0.45951448 0.46520678 0.48593708]
0.4656508287503232
CV
{'kernel': 'linear', 'degree': 7, 'C': 0.01}
[0.62777685 0.71075077 0.60825517 0.6422321  0.56745082]
0.6312931435544682
CV
{'kernel': 'rbf', 'degree': 7, 'C': 0.01}
[0.42862782 0.48896798 0.45951448 0.46520678 0.48593708]
0.4656508287503232
CV
{'kernel': 'linear', 'degree': 3, 'C': 1}
[0.61279487 0.71685718 0.59814709 0.62232474 0.54879717]
0.6197842089273486
CV
{'kernel': 'rbf', 'degree': 3, 'C': 1}
[0.65025628 0.71445351 0.68812096 0.67366248 0.69471245]
0.6842411370174695
CV
{'kernel': 'linear', 'd

In [14]:
results_df = pd.DataFrame(results)
results_df.to_pickle("crossval_ppp_SVM.pkl")
results_df.to_csv("crossval_ppp_SVM.csv")
results_df

Unnamed: 0,allele,best_oob_param,best_oob_score,best_cv_param,best_cv_scores
0,A0101,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6790072750194192, 0.742506610194608, 0.7089..."
1,A0201,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6595062809934203, 0.6425391447861322, 0.634..."
2,A0203,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6664562333875743, 0.7052010252373623, 0.665..."
3,A0206,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6528505766183244, 0.6726620997625883, 0.624..."
4,A0301,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6404292321364998, 0.6534728938990689, 0.633..."
5,A1101,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.7485388416399097, 0.7217285741643072, 0.716..."
6,A2301,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6239593329119871, 0.650831470784877, 0.6143..."
7,A2402,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.5495284267571928, 0.5255582522514299, 0.502..."
8,A2601,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.6148154327942268, 0.6293880423104752, 0.598..."
9,A2902,,,"{'kernel': 'rbf', 'degree': 7, 'C': 10}","[0.5830385522697548, 0.6444427882932677, 0.632..."


## Tune the best

In [17]:
def train_best_model(allele, params, exp_name):
    allele_train_dataset = train_dataset[train_dataset["allele"]==allele]
    (X_train, y_train, y_l, cv) = extract_features_Xy_cv(allele_train_dataset, allele)
    regr_best = SVR()
    regr_best.set_params(**params)
    regr = make_pipeline(StandardScaler(), regr_best)
    regr.fit(X_train, y_train)
    with open('./final_SVM_REGRmodels/'+allele+exp_name+'.pkl', 'wb') as fid:
        cPickle.dump(regr, fid)    

In [16]:
results_df = pd.DataFrame(results)
for allele in alleles:
    params = list(results_df[results_df["allele"]==allele]["best_cv_param"])[0]
    train_best_model(allele, params, "ppp")   

ValueError: Invalid parameter C for estimator KernelRidge(alpha=1, coef0=1, degree=7, gamma=None, kernel='rbf',
            kernel_params=None). Check the list of available parameters with `estimator.get_params().keys()`.

In [None]:
results_df

## --------------------------------------------------- ##
## The middle/anchor position experiments are done for RF only 

## Middle position

In [13]:
# extracting features in training format
# and get the cross-validation iterator
def get_energies(X):
    ene = np.roll(X, -3, axis = 0)[:4,:19]
    ene = ene.reshape(4*19)
    return ene

In [None]:
alleles = train_dataset["allele"].unique()
results = {"allele":[], "best_oob_param":[], "best_oob_score":[], "best_cv_param":[], "best_cv_scores":[]}

for allele in alleles:
    print("------------------------------------------------------------------------")
    print("ALLELE")
    print(allele)
    res = param_tune_allele(allele, train_dataset)
    results["allele"].append(allele)
    results["best_oob_param"].append(res[0])
    results["best_oob_score"].append(res[1])
    results["best_cv_param"].append(res[2])
    results["best_cv_scores"].append(res[3])

In [None]:
def train_best_model(allele, params, exp_name):
    allele_train_dataset = train_dataset[train_dataset["allele"]==allele]
    (X_train, y_train, y_l, cv) = extract_features_Xy_cv(allele_train_dataset, allele)
    regr_best = RandomForestRegressor(n_jobs=-1)
    regr_best.set_params(**params)
    regr_best.fit(X_train, y_train)
    with open('./final_REGRmodels/'+allele+exp_name+'.pkl', 'wb') as fid:
        cPickle.dump(regr_best, fid)    

In [None]:
for allele in alleles:
    params = list(results_df[results_df["allele"]==allele]["best_cv_param"])[0]
    train_best_model(allele, params, "ppp-middle") 

## Anchor positions

In [14]:
# extracting features in training format
# and get the cross-validation iterator
def get_energies(X):
    ene = np.roll(X, 2, axis = 0)[:5,:19]
    ene = np.roll(ene, -2, axis = 0)
    ene = ene.reshape(5*19)
    return ene

In [None]:
alleles = train_dataset["allele"].unique()
results = {"allele":[], "best_oob_param":[], "best_oob_score":[], "best_cv_param":[], "best_cv_scores":[]}

for allele in alleles:
    print("------------------------------------------------------------------------")
    print("ALLELE")
    print(allele)
    res = param_tune_allele(allele, train_dataset)
    results["allele"].append(allele)
    results["best_oob_param"].append(res[0])
    results["best_oob_score"].append(res[1])
    results["best_cv_param"].append(res[2])
    results["best_cv_scores"].append(res[3])

In [None]:
def train_best_model(allele, params, exp_name):
    allele_train_dataset = train_dataset[train_dataset["allele"]==allele]
    (X_train, y_train, y_l, cv) = extract_features_Xy_cv(allele_train_dataset, allele)
    regr_best = RandomForestRegressor(n_jobs=-1)
    regr_best.set_params(**params)
    regr_best.fit(X_train, y_train)
    with open('./final_REGRmodels/'+allele+exp_name+'.pkl', 'wb') as fid:
        cPickle.dump(regr_best, fid)

In [None]:
for allele in alleles:
    params = list(results_df[results_df["allele"]==allele]["best_cv_param"])[0]
    train_best_model(allele, params, "ppp-anchor") 