In [1]:
import os

root_dir = os.path.abspath(os.path.join(os.getcwd(), "../"))
os.chdir(root_dir)

In [2]:
# import io
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd
sns.set_style("whitegrid")
sns.set_context("paper", font_scale=1.4)
# sns.set_palette("Set2")
import functions

from multiprocessing import Pool
from functools import partial

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import matthews_corrcoef

# from ipywidgets import interact
# import ipywidgets as widgets
from scipy.signal import savgol_filter

%load_ext autoreload
%autoreload 2



# Kyte & Doolittle index of hydrophobicity
# J. Mol. Biol. 157:105-132(1982).
kd = {"A": 1.8, "R": -4.5, "N": -3.5, "D": -3.5, "C": 2.5,
      "Q": -3.5, "E": -3.5, "G": -0.4, "H": -3.2, "I": 4.5,
      "L": 3.8, "K": -3.9, "M": 1.9, "F": 2.8, "P": -1.6,
      "S": -0.8, "T": -0.7, "W": -0.9, "Y": -1.3, "V": 4.2}

#Sort kd
kd = {k: v for k, v in sorted(kd.items(), key=lambda item: item[1])}

# Flexibility
# Normalized flexibility parameters (B-values), average
# Vihinen M., Torkkila E., Riikonen P. Proteins. 19(2):141-9(1994).
flexibilities_vih = {"A": 0.984, "C": 0.906, "E": 1.094, "D": 1.068,
"G": 1.031, "F": 0.915, "I": 0.927, "H": 0.950,
"K": 1.102, "M": 0.952, "L": 0.935, "N": 1.048,
"Q": 1.037, "P": 1.049, "S": 1.046, "R": 1.008,
"T": 0.997, "W": 0.904, "V": 0.931, "Y": 0.929}

def flex(seq):
    seq = seq.replace('U', 'C')
    if len(seq) == 0:
        return 0
    else:
        return [flexibilities_vih[i] for i in seq]

def gravy(seq):
    seq = seq.replace('U', 'C')
    if len(seq) == 0:
        return 0
    else:
        return np.mean([kd[i] for i in seq])

def hydrop(seq):
    seq = seq.replace('U', 'C')
    return [kd[i] for i in seq]

def filt(seq, w=15, p=2):
    seq = seq.replace('U', 'C')
    aa = hydrop(seq)
    return savgol_filter(aa, w, p)

aa = [k for k, v in kd.items()]

def count(seq):
    seq = seq.replace('U', 'C')
    counts = [seq.count(i) for i in aa]
    return counts

def clean(seq):
    seq = ''.join(seq.split('_'))
    if len(seq) < 30:
        seq = seq + 'S'*(30-len(seq))
    return seq


def make_dic(arr):
    '''Make an amino acid dictionary from an array of values
    '''
    dic = {}
    ks = [k for k, v in kd.items()]
    for i, v in enumerate(ks):
        dic[v] = arr[i]
    return dic


def progress(iteration, total, message=None):
    '''Simple progressbar
    '''
    if message is None:
        message = ''
    bars_string = int(float(iteration) / float(total) * 50.)
    print("\r|%-50s| %d%% (%s/%s) %s "% ('█'*bars_string+ "░" * \
                                     (50 - bars_string), float(iteration)/\
                                     float(total) * 100, iteration, total, \
                                     message), end='\r', flush=True)

    if iteration == total:
        print('\nCompleted!')


In [3]:
df_ = pd.read_pickle('results/signalp5_train_all.pkl.gz')
df = df_.sample(frac=1, random_state=12345)

#We will use -15:15 around cleavage site only
#We will also pad shorter sequences with 'S' 

df['Alig'] = df['Aligned'].str[5:-6].apply(clean)

df.head(2)

Unnamed: 0,Entry,All,Protein,Entry name,Status,Protein names,Gene names,Organism,Length,Signal peptide,...,Biotechnological use,Motif,Domain [FT],Cross-reference (InterPro),Cross-reference (PROSITE),Cleavage,Accession,Label,Aligned,Alig
5947,O35129,sp|O35129|PHB2_MOUSE Prohibitin-2 OS=Mus muscu...,MAQNLKDLAGRLPAGPRGMGTALKLLLGAGAVAYGVRESVFTVEGG...,PHB2_MOUSE,reviewed,Prohibitin-2 (B-cell receptor-associated prote...,Phb2 Bap Bcap37 Rea,Mus musculus (Mouse),299,,...,,,,IPR001107;IPR036013;IPR000163;,,,O35129|EUKARYA|NO_SP|1,0,AQNLKDLAGRLPAGPRGMGTALKLLLGAGAVAYGVRESVFT,DLAGRLPAGPRGMGTALKLLLGAGAVAYGV
6252,F4IE66,sp|F4IE66|PRP22_ARATH Pre-mRNA-splicing factor...,MPSMAQGELKSFVQNSRPNPKSPTVSPFSMRQKIAEHRRSLPIASV...,PRP22_ARATH,reviewed,Pre-mRNA-splicing factor ATP-dependent RNA hel...,RID1 At1g26370 T1K7.25,Arabidopsis thaliana (Mouse-ear cress),717,,...,,"MOTIF 162..165; /note=""DEAH box""; /evidence=...","DOMAIN 51..256; /note=""Helicase ATP-binding"";...",IPR003593;IPR011709;IPR007502;IPR014001;IPR001...,PS51192;PS51194;,,F4IE66|EUKARYA|NO_SP|0,0,PSMAQGELKSFVQNSRPNPKSPTVSPFSMRQKIAEHRRSLP,GELKSFVQNSRPNPKSPTVSPFSMRQKIAE


In [4]:
def cost_func(weights, df, index=0):
    new_dict = make_dic(weights)
    df['All_counts'] = df['Alig'].str[index: index+1].apply(lambda x: new_dict[x])

    y = df['Label'].values
    preds = df[['All_counts']]
    
    precision, recall, _ = precision_recall_curve(y, preds, )
    pr_auc = auc(recall, precision)


    #ROC
    fpr, tpr, _ = roc_curve(y, preds)
    roc_auc = auc(fpr, tpr)
    
    return np.array([-pr_auc, -roc_auc])

def substitute(weights, num, rand=12345):
    '''
    Randomly substitutes specified number of amino acids in given sequence
    '''
    rand_state = np.random.RandomState(rand)
    wts = weights.copy()
    lb = -5.25 #lower bound
    ub = 5.25 #upper bound
    pos = rand_state.choice(list(range(len(weights))), num)

    #subsitution
    for _, p in enumerate(pos):
        wts[p] = rand_state.uniform(lb, ub)
    return wts




def simulated_annealing(position, weights, df_, cost_f=cost_func, niter=10, rnd=12345, plot=False,):
    '''
    Multi-objective simulated annealing to maximise both PRAUC and ROCAUC.
    Basically uses numpy array.
    '''

    df = df_.copy()
    
    
    current_sol = weights.copy()
    current_best_sol = weights.copy()
    
    current_costs = cost_func(current_sol, df, position)
    current_best_costs = cost_func(current_best_sol, df, position)
    #For plotting
    current_best_costs_lst = [current_best_costs]
    
    temps = np.geomspace(len(weights), 0.00001, niter)
    num_of_subst = ([int(i) for i in np.geomspace(len(weights), 1, niter)])

    for i, t in enumerate(temps):
        internal_counter = 0
        
        while True:
            if internal_counter < 100:
                rn = np.random.RandomState(i)
                new_sol = substitute(current_best_sol, num_of_subst[i], rand=i*internal_counter)
                new_costs = cost_func(new_sol, df)

                #Metropolis-Hastings
                boltzmanns = np.array([np.exp(-(i[0] - i[1])/(t*1000000)) for i in zip(new_costs, current_costs)])

                if ((new_costs < current_costs).all() \
                or (boltzmanns > rn.rand()).all()):
                    current_sol = new_sol
                    current_costs = cost_func(current_sol, df, position)
                if ((current_costs < current_best_costs ).all()):
                    current_best_sol = current_sol
                    current_best_costs = cost_func(current_sol, df, position)
                    current_best_costs_lst.append(current_best_costs)
                    break
                else:
                    internal_counter += 1
            else:
                print('Too many iterations here!', end='\r')
                break

        
    try: #probably can remove this try catch thing
        if plot:
            labels = ['PRROC', 'AUROC', 'MCC']
            for i in list(range(len(current_best_costs))):
                y = [c[i] for c in current_best_costs_lst]
                ax = sns.lineplot(y=y,\
                                  x=list(range(len(current_best_costs_lst))), label=labels[i])
                ax.set_xlabel('Iterations')
                ax.set_ylabel('Cost Function')
    except Exception:
        print(current_best_costs_lst)

    return current_best_sol, current_best_costs, current_best_costs_lst

In [5]:
def optimise_weights(weights, df, length, niter=50,):
    '''
    Optimise weights at n positions given a dataframe
    '''
    pools = Pool(length)

    cost_funcs = partial(simulated_annealing, weights=weights, df_=df, niter=niter, \
                         cost_f=cost_func)
    optimisation_results = []
    for result in pools.map(cost_funcs, list(range(0, length))): #map is used for correct orders
        optimisation_results.append(result)
    pools.close()
    pools.join()
    return optimisation_results

def make_weight(optimisation_res):
    weights_df = pd.DataFrame(columns=np.arange(0, 30), index=[k for k,v in kd.items()])
    for i, v in enumerate(np.arange(30)):
        weights_df[v] = optimisation_res[i][0]
    return weights_df


def score(seq, df):
    seq = seq.replace('U', 'C')
    try:
        w = np.array([df[i][v] for i, v in enumerate(seq)])
        return w
    except Exception:
        print(seq)
    
def score_cs(seq, weight_df, clf):
    '''
    Finds cleavage site.
    First sequence is broken to subsequences of length 30.
    RF scores each subsegment.
    Higest scoring subsegment has cleavage at 16th pos on that
    subsegment.
    '''
    seq = seq.upper()
    #Score by RF1
    seq1 = seq[:30]

    
    #score by RF2
    subseqs = []
    for i in range(0, 30):
        if len(seq[i:i+30]) == 30:
            subseqs.append(seq[i:i+30])
    prob_cs = clf.predict_proba([score(x, weight_df) for x in subseqs])



    positions = list(range(15, 15+len(subseqs)))
    sp_cs_probs = [i[1] for i in prob_cs]
    
    return max(sp_cs_probs), positions[np.argmax(sp_cs_probs)], sp_cs_probs

In [6]:
ll = np.arange(5)
init_state = [v for k,v in kd.items()]

rnd = np.random.RandomState(12345)

dfs = []

for q in range(5):
    dfs.append(df[df['Accession'].apply(lambda x: '|{}'.format(q) in x)].copy())

In [7]:
init_state = [v for k,v in kd.items()]

cv_results = pd.DataFrame(columns=['Weights', 'Best cost', 'Accepted costs', \
                                  'Precision', 'Recall', 'PR_AUC', \
                                  'FPR', 'TPR', 'ROC_AUC', 'MCC', 'Classifier'], index=np.arange(1, 6))

all_res = []

for j in range(5):
    test_idx = j
    train_idx = list(set(ll) - set([j]))
    
    test = dfs[j]
    train = pd.concat([dfs[i] for i in train_idx])

    train = train.reset_index(drop=True)
    train = train.sample(frac=1, random_state=12345).copy()
    
    
    new_results = optimise_weights(init_state, train, length=30, niter=500)
    cv_results.loc[j+1][['Weights', 'Best cost', 'Accepted costs']] = [i[0] for i in new_results], \
    [i[1] for i in new_results], [i[2] for i in new_results]

    weight_df = make_weight(new_results)

    train['All_counts'] = train['Alig'].apply(lambda x: score(x, weight_df))
    test['All_counts'] = test['Alig'].apply(lambda x: score(x, weight_df))

    X_train = [i for i in train['All_counts']]
    y_train = train['Label'].values

    X_test = [i for i in test['All_counts']]
    y_test = test['Label'].values

    cv_results.loc[j+1]['Train'] = [X_train, y_train]
    cv_results.loc[j+1]['Test'] = [X_test, y_test]

    clf = RandomForestClassifier(random_state=12345,) # n_jobs=-1)
    clf.fit(X_train, y_train)

    cv_results.loc[j+1]['Classifier'] = clf


    preds = [score_cs(i, weight_df, clf) for i in test.Protein.str[:80].values]


    dfs[j]['SP_Prediction'] = preds

    progress(j+1, 5)


Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many iterations here!Too many ite

|██████████████████████████████████████████████████| 100% (5/5)  
Completed!


In [8]:
#Export scored dataframe
final_df = pd.concat(dfs)

final_df['CS_prob'] = final_df['SP_Prediction'].apply(lambda x: x[0])
final_df['CS_pos'] = final_df['SP_Prediction'].apply(lambda x: x[1])
final_df['Diff'] = abs(final_df['Cleavage'] - final_df['CS_pos'])

final_df.to_pickle('results/SP_NOSP_Cleavage_scored_100_500_final.pkl.gz')
final_df.Diff.value_counts()

0.0     1899
2.0      169
3.0      135
1.0      126
4.0       82
5.0       46
6.0       41
8.0       40
7.0       20
9.0       11
10.0      10
19.0       4
17.0       3
24.0       3
12.0       3
15.0       3
18.0       3
39.0       2
21.0       2
11.0       2
25.0       1
23.0       1
13.0       1
22.0       1
29.0       1
Name: Diff, dtype: int64

In [9]:
#Export scored dataframe
final_df = pd.concat(dfs)

final_df['CS_prob'] = final_df['SP_Prediction'].apply(lambda x: x[0])
final_df['CS_pos'] = final_df['SP_Prediction'].apply(lambda x: x[1])
final_df['Diff'] = abs(final_df['Cleavage'] - final_df['CS_pos'])

final_df.to_pickle('results/SP_NOSP_Cleavage_scored_100_500_final.pkl.gz')
final_df.Diff.value_counts()

0.0     1899
2.0      169
3.0      135
1.0      126
4.0       82
5.0       46
6.0       41
8.0       40
7.0       20
9.0       11
10.0      10
19.0       4
17.0       3
24.0       3
12.0       3
15.0       3
18.0       3
39.0       2
21.0       2
11.0       2
25.0       1
23.0       1
13.0       1
22.0       1
29.0       1
Name: Diff, dtype: int64

In [12]:
#Export all classifiers and results
cv_results.to_pickle('results/SP_NOSP_Cleavage_crossvalidation_RF.pkl.gz')

In [13]:
def export_weight(weights):
    weights_df = pd.DataFrame(columns=np.arange(0, 30), index=[k for k,v in kd.items()])
    for i, v in enumerate(np.arange(30)):
        weights_df[v] = weights[i]
    return weights_df

weights_df_export = pd.DataFrame(columns=['Weight'], index=list(range(5)))
for i in range(5):
    weights_df_export.loc[i]['Weight'] = export_weight(cv_results['Weights'][i+1])

weights_df_export.to_pickle('results/Cleavage_weights.pkl.gz')

In [14]:
classifiers_df = cv_results[['Classifier']].copy()
classifiers_df.to_pickle('results/C.pkl.gz')