# Create plots for comparing robust and original Pepper model

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
import scipy


In [None]:
noise_ratios = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
original_results = []

for noise in noise_ratios:
    print("Noise ", noise)
    sub_df = []
    for run in range(10):
        df = pd.read_csv('results/NCI-60_label_noise_simulation_results_original_model_noise_ratio_' + str(noise) + '_run' + str(run) + '.tsv',
                         index_col = 0, sep = '\t')
        sub_df.append(df['Test improvement'].values)
    sub_df = pd.DataFrame(sub_df, columns = [noise])
    print(sub_df)
    print("Mean score: ", sub_df)
    original_results.append(sub_df)

original_results = pd.concat(original_results, axis = 1)
original_results

In [None]:
noise_ratios = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
robust_results = []

for noise in noise_ratios:
    print("Noise ", noise)
    sub_df = []
    for run in range(10):
        df = pd.read_csv('results/NCI-60_label_noise_simulation_results_robust_model_noise_ratio_' + str(noise) + '_run' + str(run) + '.tsv',
                         index_col = 0, sep = '\t')
        sub_df.append(df['Test improvement'].values)
    sub_df = pd.DataFrame(sub_df, columns = [noise])
    print(sub_df)
    print("Mean score: ", sub_df)
    robust_results.append(sub_df)

robust_results = pd.concat(robust_results, axis = 1)
robust_results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig, ax = plt.subplots()
fig.set_size_inches(30, 20)

SMALL_SIZE = 30
MEDIUM_SIZE = 40
BIGGER_SIZE = 50

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

all_scores = np.mean(original_results, axis = 0)
error_scores = stats.sem(original_results, axis = 0)

plt.plot(noise_ratios, all_scores, 'o-', 
         lw = 10, markersize = 35, color='#eb4d4b', label = 'Coefficient predictor')
plt.errorbar(noise_ratios, all_scores, error_scores, lw = 5,
             linestyle='None', marker='^', color = '#eb4d4b')


all_scores = np.mean(robust_results, axis = 0)
error_scores = stats.sem(robust_results, axis = 0)

plt.plot(noise_ratios, all_scores, 'o-', 
         lw = 10, markersize = 35, color='#4834d4', label = 'Robust coefficient predictor')
plt.errorbar(noise_ratios, all_scores, error_scores, lw = 5,
             linestyle='None', marker='^', color = '#4834d4')



plt.xlabel('Percent of non-sibling peptides')
plt.ylabel('Test set percent improvement \n over baseline')
plt.xticks(noise_ratios, [int(n * 100) for n in noise_ratios])
plt.legend()
plt.ylim([0, 40])
plt.grid()