In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from tensorflow import keras
import math
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import statistics

In [None]:
#BASE MODEL BOXPLOT

df=pd.read_csv('data/cncc_predictions.csv')

err_train = []
err_val = []
err_test = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir0'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        for i in range(50):
            cell_errors = train[train['track']==track]['pred_error{}'.format(i)]
            err_train.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors))))) 

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir0'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        for i in range(50):
            cell_errors = val[val['track']==track]['pred_error{}'.format(i)]
            err_val.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
        
    test = df[(df['set']==2) & (df['pred_dir0'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_train, err_val, err_test]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM', fontsize=18)
ax.set_xlabel('Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.7, patch_artist=True)
plt.xticks(np.arange(1,4), labels=['Train', 'Validation', 'Test'], fontsize=14)
plt.yticks(fontsize=14)

color = 'lightblue'
for patch in boxes['boxes']:
    patch.set_facecolor(color)
    
#legend information
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [median_label,mean_label],
    ['Median','Mean'],
    bbox_to_anchor=(.9,1.0), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/cncc_trainvaltest_boxplot.pdf') #save the figure

Q1 = np.percentile(err_train, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train), IQR, statistics.mean(err_train), statistics.stdev(err_train)))

Q1 = np.percentile(err_val, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val), IQR, statistics.mean(err_val), statistics.stdev(err_val)))
     
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

In [None]:
#NAIVE BOXPLOT

df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_naive.csv')

err_lstm = []
err_naive = []
err_lin = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    lstm = df[(df['set']==2) & (df['pred_dir0'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = lstm[lstm['track']==track]['pred_error{}'.format(i)]
            err_lstm.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))  

    naive = df2[(df2['set']==2) & (df2['pred_naive'].notna()) & (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = naive[naive['track']==track]['naive_error']
        err_naive.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
        
    lin = df2[(df2['set']==2) & (df2['pred_lin'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = lin[lin['track']==track]['lin_error']
        err_lin.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_lstm, err_naive, err_lin]
    
fig, ax = plt.subplots()
ax.set_title('LSTM and Naive Test Set Errors', fontsize=18)
ax.set_xlabel('Model', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.7, patch_artist=True)
plt.xticks(np.arange(1,4), labels=['LSTM', 'Constant\nDirectedness', 'Linear\nPredictor'], fontsize=14)
plt.yticks(fontsize=14)

base_color = 'lightblue'
const_color='lightpink'
linear_color = 'orchid'
colors = [base_color,const_color,linear_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [median_label,mean_label],
    ['Median','Mean'],
    fancybox=True, shadow=True, fontsize=14)
    
plt.savefig('figures/cncc_naive_boxplot.pdf') #save the figure

Q1 = np.percentile(err_lstm, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_lstm, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('LSTM Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_lstm), IQR, statistics.mean(err_lstm), statistics.stdev(err_lstm)))

Q1 = np.percentile(err_naive, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_naive, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('One-Step Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_naive), IQR, statistics.mean(err_naive), statistics.stdev(err_naive)))
     
Q1 = np.percentile(err_lin, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_lin, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Two-Step Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_lin), IQR, statistics.mean(err_lin), statistics.stdev(err_lin)))

In [None]:
#INTERPOLATION BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model
df_inter=pd.read_csv('data/cncc_no30_predictions.csv') #load interpolation model - trained w/o 30mV/mm

#errors on the full test set
err_base_all = []
err_inter_all = []
#errors on 30mV/mm test instances
err_base_30 = []
err_inter_30 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # Get errors from base model
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base_all.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            if ef==30:
                err_base_30.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
                
    # Get errors from interpolation model
    test = df_inter[(df_inter['set']==2) & (df_inter['pred_dir0'].notna())& (df_inter['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_inter_all.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            if ef==30:
                err_inter_30.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base_all,err_inter_all,err_base_30,err_inter_30]
    
fig, ax = plt.subplots()
ax.set_title('Interpolation Model Error', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, positions=[0.8,1.2,1.8,2.2], widths=0.35, patch_artist=True)
plt.xticks([1,2], labels=['Full Test Set', '30mV/mm Instances'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
inter_color = 'lightgreen'
colors = [base_color, inter_color, base_color, inter_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
inter_label, = ax.plot([],'s',color=inter_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [base_label,inter_label,median_label,mean_label],
    ['Base Model','Interpolation Model','Median','Mean'],
    bbox_to_anchor=(0.85,1), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/cncc_interpolation_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#EXTRAPOLATION BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model
df_inter=pd.read_csv('data/cncc_no200_predictions.csv') #load interpolation model - trained w/o 200mV/mm

#errors on the full test set
err_base_all = []
err_inter_all = []
#errors on 30mV/mm test instances
err_base_200 = []
err_inter_200 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # Get errors from base model
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base_all.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            if ef==200:
                err_base_200.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
                
    # Get errors from interpolation model
    test = df_inter[(df_inter['set']==2) & (df_inter['pred_dir0'].notna())& (df_inter['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_inter_all.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            if ef==200:
                err_inter_200.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base_all,err_inter_all,err_base_200,err_inter_200]
    
fig, ax = plt.subplots()
ax.set_title('Extrapolation Model Error', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, positions=[0.8,1.2,1.8,2.2], widths=0.35, patch_artist=True)
plt.xticks([1,2], labels=['Full Test Set', '200mV/mm Instances'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
ext_color = 'aquamarine'
colors = [base_color, ext_color, base_color, ext_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
ext_label, = ax.plot([],'s',color=ext_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [base_label,ext_label,median_label,mean_label],
    ['Base Model','Extrapolation Model','Median','Mean'],
    bbox_to_anchor=(0.85,1), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/cncc_extrapolation_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#REVERSAL BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_rev=pd.read_csv('data/reversal_predictions.csv') #load reversal model data
df_trans=pd.read_csv('data/reversal_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_rev = []
err_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from reversal tests

#base model on reversal

#reversal model
test = df_rev[(df_rev['pred_dir0'].notna())]
for track in test['track'].unique():
    for i in range(50):
        cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
        err_rev.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
#transfer learning model
test = df_trans[(df_trans['pred_dir0'].notna())]
for track in test['track'].unique():
    for i in range(50):
        cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
        err_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base,err_rev,err_trans]
    
fig, ax = plt.subplots()
ax.set_title('Polarity Reversal Prediction Errors', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
plt.xticks([1,2,3], labels=['CNCC', 'Reversal', 'Reversal'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
rev_color = 'orchid'
trans_color = 'darkslateblue'
colors = [base_color, rev_color, trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
rev_label, = ax.plot([],'s',color=rev_color)
trans_label, = ax.plot([],'s',color=trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
lgd=ax.legend(
    [base_label,rev_label,trans_label,median_label,mean_label],
    ['Base Model','Reversal Model','Base to Reversal Transfer','Median','Mean'],
    bbox_to_anchor=(0.9,0.99), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/reversal_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#KERATOCYTE BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_ker=pd.read_csv('data/keratocyte_predictions.csv') #load keratocyte model data
df_trans=pd.read_csv('data/keratocyte_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_ker = []
err_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratocyte models
for ef in df_ker['ef'].unique():
    #keratocyte model
    test = df_ker[(df_ker['pred_dir0'].notna())& (df_ker['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_ker.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_trans[(df_trans['pred_dir0'].notna())& (df_trans['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base,err_ker,err_trans]
    
fig, ax = plt.subplots()
ax.set_title('Keratocyte Prediction Errors', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
plt.xticks([1,2,3], labels=['CNCC', 'Keratocyte', 'Keratocyte'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
ker_color = 'lime'
trans_color = 'springgreen'
colors = [base_color, ker_color, trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
ker_label, = ax.plot([],'s',color=ker_color)
trans_label, = ax.plot([],'s',color=trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [base_label,ker_label,trans_label,median_label,mean_label],
    ['Base Model','Keratocyte Model','CNCC to Keratocyte Transfer','Median','Mean'],
    bbox_to_anchor=(0.6,1.03), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/keratocyte_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#KERATINOCYTE SET 1 BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_ker=pd.read_csv('data/NHK0001_predictions.csv') #load keratocyte model data
df_trans=pd.read_csv('data/NHK0001_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_ker = []
err_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratinocyte models
#keratinocyte model
test = df_ker[(df_ker['pred_dir0'].notna())]
for track in test['track'].unique():
    for i in range(50):
        cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
        err_ker.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
#transfer learning model
test = df_trans[(df_trans['pred_dir0'].notna())]
for track in test['track'].unique():
    for i in range(50):
        cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
        err_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))

print(len(err_ker))        
print(len(err_trans))    

errors = [err_base,err_ker,err_trans]
    
fig, ax = plt.subplots()
ax.set_title('Keratinocyte 1min Prediction Errors', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
plt.xticks([1,2,3], labels=['CNCC', 'Keratinocyte', 'Keratinocyte'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
ker_color = 'olivedrab'
trans_color = 'yellowgreen'
colors = [base_color, ker_color, trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
ker_label, = ax.plot([],'s',color=ker_color)
trans_label, = ax.plot([],'s',color=trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
ax.legend(
    [base_label,ker_label,trans_label,median_label,mean_label],
    ['Base Model','Keratinocyte 1 Model','CNCC to Keratinocyte 1 Transfer','Median','Mean'],
    bbox_to_anchor=(0.65,1), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/NHK0001_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#KERATINOCYTE SET 2 BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_ker=pd.read_csv('data/NHK0002_predictions.csv') #load keratocyte model data
df_trans=pd.read_csv('data/NHK0002_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_ker = []
err_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratinocyte models
for ef in df_ker['ef'].unique():
    #keratinocyte model
    test = df_ker[(df_ker['pred_dir0'].notna())& (df_ker['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_ker.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_trans[(df_trans['pred_dir0'].notna())& (df_trans['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base,err_ker,err_trans]
    
fig, ax = plt.subplots()
ax.set_title('Keratinocyte 1min Set 2 Prediction Errors', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
plt.xticks([1,2,3], labels=['CNCC', 'Keratinocyte', 'Keratinocyte'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
ker_color = 'crimson'
trans_color = 'palevioletred'
colors = [base_color, ker_color, trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
ker_label, = ax.plot([],'s',color=ker_color)
trans_label, = ax.plot([],'s',color=trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
lgd=ax.legend(
    [base_label,ker_label,trans_label,median_label,mean_label],
    ['Base Model','Keratinocyte Model 2','CNCC to Keratinocyte 2 Transfer','Median','Mean'],
    bbox_to_anchor=(0.65,1), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/NHK0002_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#KERATINOCYTE SET 3 BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_ker=pd.read_csv('data/NHK0802-YL112208_predictions.csv') #load keratocyte model data
df_trans=pd.read_csv('data/NHK0802-YL112208_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_ker = []
err_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratinocyte models
for ef in df_ker['ef'].unique():
    #keratinocyte model
    test = df_ker[(df_ker['pred_dir0'].notna())& (df_ker['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_ker.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_trans[(df_trans['pred_dir0'].notna())& (df_trans['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base,err_ker,err_trans]
    
fig, ax = plt.subplots()
ax.set_title('Keratinocyte 10min Prediction Errors', fontsize=18)
ax.set_xlabel('Test Set', fontsize=16)
ax.set_ylabel('Cell-Level RMSE', fontsize=16)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
plt.xticks([1,2,3], labels=['CNCC', 'Keratinocyte', 'Keratinocyte'], fontsize=14)
plt.yticks(fontsize=14)

#colors
base_color = 'lightblue'
ker_color = 'rebeccapurple'
trans_color = 'mediumpurple'
colors = [base_color, ker_color, trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color)
ker_label, = ax.plot([],'s',color=ker_color)
trans_label, = ax.plot([],'s',color=trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
lgd=ax.legend(
    [base_label,ker_label,trans_label,median_label,mean_label],
    ['Base Model','Keratinocyte Model 3','CNCC to Keratinocyte 3 Transfer','Median','Mean'],
    bbox_to_anchor=(0.65,1), fancybox=True, shadow=True, loc='upper left', fontsize=14)
    
plt.savefig('figures/NHK0802-YL112208_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
#ALL OTHER CELL TYPES BOXPLOT

df_base=pd.read_csv('data/cncc_predictions.csv') #load base model data 
df_keratocyte=pd.read_csv('data/keratocyte_predictions.csv') #load keratocyte model data
df_keratocyte_trans=pd.read_csv('data/keratocyte_transfer_predictions.csv') #load transfer learning data
df_keratinocyte1=pd.read_csv('data/NHK0001_predictions.csv') #load keratinocyte model 1 data
df_keratinocyte1_trans=pd.read_csv('data/NHK0001_transfer_predictions.csv') #load transfer learning data
df_keratinocyte10=pd.read_csv('data/NHK0802-YL112208_predictions.csv') #load keratinocyte 10 model data
df_keratinocyte10_trans=pd.read_csv('data/NHK0802-YL112208_transfer_predictions.csv') #load transfer learning data

#errors on the respective test sets
err_base = []
err_keratocyte = []
err_keratocyte_trans = []
err_keratinocyte1 = []
err_keratinocyte1_trans = []
err_keratinocyte10 = []
err_keratinocyte10_trans = []

# Get errors from base model on benchmark data
for ef in df_base['ef'].unique():
    test = df_base[(df_base['set']==2) & (df_base['pred_dir0'].notna())& (df_base['ef']==ef)]
    for track in range(1,51):
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_base.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratocyte models
for ef in df_keratocyte['ef'].unique():
    #keratocyte model
    test = df_keratocyte[(df_keratocyte['pred_dir0'].notna())& (df_keratocyte['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratocyte.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_keratocyte_trans[(df_keratocyte_trans['pred_dir0'].notna())& (df_keratocyte_trans['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratocyte_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratinocyte 1min models
for ef in df_keratinocyte1['ef'].unique():
    #keratinocyte model
    test = df_keratinocyte1[(df_keratinocyte1['pred_dir0'].notna())]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratinocyte1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_keratinocyte1_trans[(df_keratinocyte1_trans['pred_dir0'].notna())]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratinocyte1_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
# Get errors from keratinocyte 10min models
for ef in df_keratinocyte10['ef'].unique():
    #keratinocyte model
    test = df_keratinocyte10[(df_keratinocyte10['pred_dir0'].notna())& (df_keratinocyte10['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratinocyte10.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
            
    #transfer learning model
    test = df_keratinocyte10_trans[(df_keratinocyte10_trans['pred_dir0'].notna())& (df_keratinocyte10_trans['ef']==ef)]
    for track in test['track'].unique():
        for i in range(50):
            cell_errors = test[test['track']==track]['pred_error{}'.format(i)]
            err_keratinocyte10_trans.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(len(cell_errors)))))
    
errors = [err_base,err_keratocyte,err_keratocyte_trans,err_keratinocyte1,err_keratinocyte1_trans,
         err_keratinocyte10, err_keratinocyte10_trans]
    
fig, ax = plt.subplots(figsize=(18,8))
ax.set_title('Prediction Errors on Target Cell Types', fontsize=32)
ax.set_xlabel('Test Set', fontsize=24)
ax.set_ylabel('Cell-Level RMSE', fontsize=24)
#fig.set_size_inches(14, 8) # set figure size

boxes = ax.boxplot(errors, showmeans=True, showfliers=False, widths=0.4, patch_artist=True)
#plt.xticks(range(1,8), labels=['CNCC', 'Keratocyte', 'Keratocyte', 'Keratinocyte 1min', 'Keratinocyte 1min',
                           #'Keratinocyte 10min', 'Keratinocyte 10min'], fontsize=16)
plt.xticks([1,2.5,4.5,6.5], labels=['CNCC', 'Keratocyte', 'Keratinocyte 1min', 'Keratinocyte 10min'], fontsize=20)
plt.yticks(fontsize=20)

#colors
base_color = 'lightblue'
keratocyte_color = 'orchid'#'lime'
keratocyte_trans_color = 'darkslateblue'#'springgreen'
keratinocyte1_color = 'orchid'#'crimson'
keratinocyte1_trans_color = 'darkslateblue'#'palevioletred'
keratinocyte10_color = 'orchid'#'rebeccapurple'
keratinocyte10_trans_color = 'darkslateblue'#'mediumpurple'
colors = [base_color, keratocyte_color, keratocyte_trans_color, keratinocyte1_color, keratinocyte1_trans_color,
         keratinocyte10_color, keratinocyte10_trans_color]
for patch, color in zip(boxes['boxes'], colors):
    patch.set_facecolor(color)
    
#legend information
base_label, = ax.plot([],'s',color=base_color,markersize=25)
keratocyte_label, = ax.plot([],'s',color=keratocyte_color,markersize=25)
keratocyte_trans_label, = ax.plot([],'s',color=keratocyte_trans_color,markersize=25)
keratinocyte1_label, = ax.plot([],'s',color=keratinocyte1_color)
keratinocyte1_trans_label, = ax.plot([],'s',color=keratinocyte1_trans_color)
keratinocyte10_label, = ax.plot([],'s',color=keratinocyte10_color)
keratinocyte10_trans_label, = ax.plot([],'s',color=keratinocyte10_trans_color)
median_label, = ax.plot([],color='orange')
mean_label, = ax.plot([],'^',color='green')
lgd = ax.legend(
    [base_label, keratocyte_label, keratocyte_trans_label, median_label, mean_label],
    ['Base Model','No Transfer Learning','Transfer Learning with CNCC','Median','Mean'],
    fancybox=True, shadow=True, loc='upper left', fontsize=20)
    
plt.savefig('figures/targetmodels_boxplot.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

for error in errors:
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_2layer_predictions.csv')

err_train = []
err_val = []
err_test = []

err_train2 = []
err_val2 = []
err_test2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # 1-layer model
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # 2-layer model
    train = df2[(df['set']==1) & (df2['track']<41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df2[(df2['set']==1) & (df2['track']>=41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_train, err_val, err_test]
errors2 = [err_train2, err_val2, err_test2]

pos1 = []
pos2 = []
for i in range(1,4):
    pos1.append(2*i-0.254)
    pos2.append(2*i+0.254)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM - 1 and 2 Layer')
ax.set_xlabel('Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4,6], labels=['Train', 'Validation', 'Test'])

plot1 = plt.plot([], [],'s', label='1 Layer', color=c1)
plot2 = plt.plot([], [],'s', label='2 Layer', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_1layer_2layer_boxplot.pdf') #save the figure


Q1 = np.percentile(err_train, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train), IQR, statistics.mean(err_train), statistics.stdev(err_train)))

Q1 = np.percentile(err_val, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val), IQR, statistics.mean(err_val), statistics.stdev(err_val)))
     
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

In [None]:
df = pd.read_csv('data/cncc_predictions.csv') #read in predictions as dataframe

efs = [0,15,30,50,75,100,200] #the list of EFs

#lists of errors (one list for each EF)
err_train = [[],[],[],[],[],[],[]] #training errors
err_val = [[],[],[],[],[],[],[]] #validation errors
err_test = [[],[],[],[],[],[],[]] #test errors

#add errors to lists
i=0
for ef in efs:
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    i+=1
    
# positions of boxes
pos_train = [] 
pos_val = []
pos_test = []
for i in range(1,len(efs)+1):
    pos_train.append(2*i-0.5)
    pos_val.append(2*i)
    pos_test.append(2*i+0.5)

#make figure, add title and axis labels
fig, ax = plt.subplots()
ax.set_title('Prediction Errors by EF Strength')
ax.set_xlabel('EF Strength')
ax.set_ylabel('RMSE of Individual Cells')

# colors for the plot
train_c = 'red' #training box colors
val_c = 'blue' #validation box colors
test_c = 'purple' #test box colors
med_c = 'black' #color of median line
mean_c = 'green' #color of mean triangle (in legend)

fig.set_size_inches(14, 8) # set figure size

# plot training error boxes
ax.boxplot(err_train, positions=pos_train, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=train_c, color=train_c),
            whiskerprops=dict(color=train_c),
            capprops=dict(color=train_c),
            medianprops=dict(color=med_c))
# plot validation error boxes
ax.boxplot(err_val, positions=pos_val, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=val_c, color=val_c),
            whiskerprops=dict(color=val_c),
            capprops=dict(color=val_c),
            medianprops=dict(color=med_c))
# plot test error boxes
ax.boxplot(err_test, positions=pos_test, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=test_c, color=test_c),
            capprops=dict(color=test_c),
            whiskerprops=dict(color=test_c),
          medianprops=dict(color=med_c))

plt.xticks(pos_val,labels=[str(ef)+'mV/mm' for ef in efs]) # labels for xticks 

# legend information
trainplot = plt.plot([], [],'s', label='TRAINING DATA', color=train_c)
valplot = plt.plot([], [],'s', label='VALIDATION DATA', color=val_c)
testplot = plt.plot([], [],'s', label='TEST DATA', color=test_c)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=med_c)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=mean_c)

ax.legend() # create the legen

plt.savefig('figures/cncc_by_ef.pdf') #save the figure

# print out some statistics
for i in range(len(efs)):
    error = err_train[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Training Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
        
    error = err_val[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Validation Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
        
    error = err_test[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
    
#print out covariance matrix and pearson's r 
covariance = np.cov(df[(df['set']==2) & (df['pred_dir'].notna())]['ef'], df[(df['set']==2) & (df['pred_dir'].notna())]['pred_error'].abs())
corr, _ = pearsonr(df[(df['set']==2) & (df['pred_dir'].notna())]['ef'], df[(df['set']==2) & (df['pred_dir'].notna())]['pred_error'].abs())
print('Covariance Matrix:')
print(covariance)
print("Pearson's r:")
print(corr)

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no3075.csv')

err_train = []
err_val = []
err_test = []

err_train2 = []
err_val2 = []
err_test2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # 1-layer model
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # 2-layer model
    train = df2[(df['set']==1) & (df2['track']<41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df2[(df2['set']==1) & (df2['track']>=41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_train, err_val, err_test]
errors2 = [err_train2, err_val2, err_test2]

pos1 = []
pos2 = []
for i in range(1,4):
    pos1.append(2*i-0.254)
    pos2.append(2*i+0.254)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM - 1 and 2 Layer')
ax.set_xlabel('Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4,6], labels=['Train', 'Validation', 'Test'])

plot1 = plt.plot([], [],'s', label='Trained on Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm or 75mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_boxplot_no3075_trainvaltest.pdf') #save the figure


Q1 = np.percentile(err_train, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train), IQR, statistics.mean(err_train), statistics.stdev(err_train)))

Q1 = np.percentile(err_val, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val), IQR, statistics.mean(err_val), statistics.stdev(err_val)))
     
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

Q1 = np.percentile(err_train2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30,75 Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train2), IQR, statistics.mean(err_train2), statistics.stdev(err_train2)))

Q1 = np.percentile(err_val2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30,75 Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val2), IQR, statistics.mean(err_val2), statistics.stdev(err_val2)))
     
Q1 = np.percentile(err_test2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30,75 Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test2), IQR, statistics.mean(err_test2), statistics.stdev(err_test2)))

In [None]:
df1 = pd.read_csv('data/cncc_predictions.csv') #read in predictions as dataframe
df2 = pd.read_csv('data/cncc_predictions_no3075.csv') #read in predictions as dataframe

efs = [0,15,30,50,75,100,200] #the list of EFs

#lists of errors (one list for each EF)
err1 = [[],[],[],[],[],[],[]] #test errors on original model
err2 = [[],[],[],[],[],[],[]] #test errors on model trained with limited voltages

#add errors to lists
i=0
for ef in efs:   
    test1 = df1[(df1['set']==2) & (df1['pred_dir'].notna())& (df1['ef']==ef)]
    for track in range(1,51):
        cell_errors = test1[test1['track']==track]['pred_error']
        err1[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test2 = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test2[test2['track']==track]['pred_error']
        err2[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    i+=1
    
# positions of boxes
pos1 = [] 
pos2 = []
for i in range(1,len(efs)+1):
    pos1.append(2*i-0.25)
    pos2.append(2*i+0.25)

#make figure, add title and axis labels
fig, ax = plt.subplots()
ax.set_title('Prediction Errors by EF Strength')
ax.set_xlabel('EF Strength')
ax.set_ylabel('RMSE of Individual Cells')

# colors for the plot
c1 = 'red' #original model box colors
c2 = 'blue' #limited voltage box colors
med_c = 'black' #color of median line
mean_c = 'green' #color of mean triangle (in legend)

fig.set_size_inches(14, 8) # set figure size

# plot test error boxes for original model
ax.boxplot(err1, positions=pos1, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            capprops=dict(color=c1),
            whiskerprops=dict(color=c1),
          medianprops=dict(color=med_c))
# plot test error boxes for limited EF model
ax.boxplot(err2, positions=pos2, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            capprops=dict(color=c2),
            whiskerprops=dict(color=c2),
          medianprops=dict(color=med_c))

plt.xticks(range(2,16,2),labels=[str(ef)+'mV/mm' for ef in efs]) # labels for xticks 

# legend information
plot1 = plt.plot([], [],'s', label='Trained with Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm or 75mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=med_c)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=mean_c)

ax.legend() # create the legen

plt.savefig('figures/cncc_by_ef_no3075.pdf') #save the figure

# print out some statistics
for i in range(len(efs)):
    error = err1[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Original Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
    
    error = err2[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Limited EF Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no30.csv')

err_train = []
err_val = []
err_test = []

err_train2 = []
err_val2 = []
err_test2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # 1-layer model
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # 2-layer model
    train = df2[(df['set']==1) & (df2['track']<41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df2[(df2['set']==1) & (df2['track']>=41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_train, err_val, err_test]
errors2 = [err_train2, err_val2, err_test2]

pos1 = []
pos2 = []
for i in range(1,4):
    pos1.append(2*i-0.254)
    pos2.append(2*i+0.254)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM - 1 and 2 Layer')
ax.set_xlabel('Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4,6], labels=['Train', 'Validation', 'Test'])

plot1 = plt.plot([], [],'s', label='Trained on Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_boxplot_no3075_trainvaltest.pdf') #save the figure


Q1 = np.percentile(err_train, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train), IQR, statistics.mean(err_train), statistics.stdev(err_train)))

Q1 = np.percentile(err_val, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val), IQR, statistics.mean(err_val), statistics.stdev(err_val)))
     
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

Q1 = np.percentile(err_train2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train2), IQR, statistics.mean(err_train2), statistics.stdev(err_train2)))

Q1 = np.percentile(err_val2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val2), IQR, statistics.mean(err_val2), statistics.stdev(err_val2)))
     
Q1 = np.percentile(err_test2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test2), IQR, statistics.mean(err_test2), statistics.stdev(err_test2)))

In [None]:
df1 = pd.read_csv('data/cncc_predictions.csv') #read in predictions as dataframe
df2 = pd.read_csv('data/cncc_predictions_no30.csv') #read in predictions as dataframe

efs = [0,15,30,50,75,100,200] #the list of EFs

#lists of errors (one list for each EF)
err1 = [[],[],[],[],[],[],[]] #test errors on original model
err2 = [[],[],[],[],[],[],[]] #test errors on model trained with limited voltages

#add errors to lists
i=0
for ef in efs:   
    test1 = df1[(df1['set']==2) & (df1['pred_dir'].notna())& (df1['ef']==ef)]
    for track in range(1,51):
        cell_errors = test1[test1['track']==track]['pred_error']
        err1[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test2 = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test2[test2['track']==track]['pred_error']
        err2[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    i+=1
    
# positions of boxes
pos1 = [] 
pos2 = []
for i in range(1,len(efs)+1):
    pos1.append(2*i-0.25)
    pos2.append(2*i+0.25)

#make figure, add title and axis labels
fig, ax = plt.subplots()
ax.set_title('Prediction Errors by EF Strength')
ax.set_xlabel('EF Strength')
ax.set_ylabel('RMSE of Individual Cells')

# colors for the plot
c1 = 'red' #original model box colors
c2 = 'blue' #limited voltage box colors
med_c = 'black' #color of median line
mean_c = 'green' #color of mean triangle (in legend)

fig.set_size_inches(14, 8) # set figure size

# plot test error boxes for original model
ax.boxplot(err1, positions=pos1, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            capprops=dict(color=c1),
            whiskerprops=dict(color=c1),
          medianprops=dict(color=med_c))
# plot test error boxes for limited EF model
ax.boxplot(err2, positions=pos2, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            capprops=dict(color=c2),
            whiskerprops=dict(color=c2),
          medianprops=dict(color=med_c))

plt.xticks(range(2,16,2),labels=[str(ef)+'mV/mm' for ef in efs]) # labels for xticks 

# legend information
plot1 = plt.plot([], [],'s', label='Trained with Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=med_c)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=mean_c)

ax.legend() # create the legen

plt.savefig('figures/cncc_by_ef_no30.pdf') #save the figure

# print out some statistics
for i in range(len(efs)):
    error = err1[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Original Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
    
    error = err2[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Limited EF Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no200.csv')

err_train = []
err_val = []
err_test = []

err_train2 = []
err_val2 = []
err_test2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # 1-layer model
    train = df[(df['set']==1) & (df['track']<41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df[(df['set']==1) & (df['track']>=41) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # 2-layer model
    train = df2[(df['set']==1) & (df2['track']<41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(1,41):
        cell_errors = train[train['track']==track]['pred_error']
        err_train2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))  

    val = df2[(df2['set']==1) & (df2['track']>=41) & (df2['pred_dir'].notna()) & (df2['ef']==ef)]
    for track in range(41,51):
        cell_errors = val[val['track']==track]['pred_error']
        err_val2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_train, err_val, err_test]
errors2 = [err_train2, err_val2, err_test2]

pos1 = []
pos2 = []
for i in range(1,4):
    pos1.append(2*i-0.254)
    pos2.append(2*i+0.254)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM - 1 and 2 Layer')
ax.set_xlabel('Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4,6], labels=['Train', 'Validation', 'Test'])

plot1 = plt.plot([], [],'s', label='Trained on Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_boxplot_no3075_trainvaltest.pdf') #save the figure


Q1 = np.percentile(err_train, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train), IQR, statistics.mean(err_train), statistics.stdev(err_train)))

Q1 = np.percentile(err_val, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val), IQR, statistics.mean(err_val), statistics.stdev(err_val)))
     
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

Q1 = np.percentile(err_train2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_train2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Training Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_train2), IQR, statistics.mean(err_train2), statistics.stdev(err_train2)))

Q1 = np.percentile(err_val2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_val2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Validation Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_val2), IQR, statistics.mean(err_val2), statistics.stdev(err_val2)))
     
Q1 = np.percentile(err_test2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test2), IQR, statistics.mean(err_test2), statistics.stdev(err_test2)))

In [None]:
df1 = pd.read_csv('data/cncc_predictions.csv') #read in predictions as dataframe
df2 = pd.read_csv('data/cncc_predictions_no200.csv') #read in predictions as dataframe

efs = [0,15,30,50,75,100,200] #the list of EFs

#lists of errors (one list for each EF)
err1 = [[],[],[],[],[],[],[]] #test errors on original model
err2 = [[],[],[],[],[],[],[]] #test errors on model trained with limited voltages

#add errors to lists
i=0
for ef in efs:   
    test1 = df1[(df1['set']==2) & (df1['pred_dir'].notna())& (df1['ef']==ef)]
    for track in range(1,51):
        cell_errors = test1[test1['track']==track]['pred_error']
        err1[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    test2 = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test2[test2['track']==track]['pred_error']
        err2[i].append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
    i+=1
    
# positions of boxes
pos1 = [] 
pos2 = []
for i in range(1,len(efs)+1):
    pos1.append(2*i-0.25)
    pos2.append(2*i+0.25)

#make figure, add title and axis labels
fig, ax = plt.subplots()
ax.set_title('Prediction Errors by EF Strength')
ax.set_xlabel('EF Strength')
ax.set_ylabel('RMSE of Individual Cells')

# colors for the plot
c1 = 'red' #original model box colors
c2 = 'blue' #limited voltage box colors
med_c = 'black' #color of median line
mean_c = 'green' #color of mean triangle (in legend)

fig.set_size_inches(14, 8) # set figure size

# plot test error boxes for original model
ax.boxplot(err1, positions=pos1, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            capprops=dict(color=c1),
            whiskerprops=dict(color=c1),
          medianprops=dict(color=med_c))
# plot test error boxes for limited EF model
ax.boxplot(err2, positions=pos2, showmeans=True, showfliers=False, notch=False, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            capprops=dict(color=c2),
            whiskerprops=dict(color=c2),
          medianprops=dict(color=med_c))

plt.xticks(range(2,16,2),labels=[str(ef)+'mV/mm' for ef in efs]) # labels for xticks 

# legend information
plot1 = plt.plot([], [],'s', label='Trained with Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 200mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=med_c)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=mean_c)

ax.legend() # create the legend

plt.savefig('figures/cncc_by_ef_no200.pdf') #save the figure

# print out some statistics
for i in range(len(efs)):
    error = err1[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Original Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))
    
    error = err2[i]
    Q1 = np.percentile(error, 25, interpolation = 'midpoint') 
    Q3 = np.percentile(error, 75, interpolation = 'midpoint') 
    IQR = Q3 - Q1 
    print('Limited EF Model Test Error for {}mV/mm:'.format(efs[i]))
    print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(error), IQR, statistics.mean(error), statistics.stdev(error)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no30.csv')

err_test = []
err_new = []

err_test2 = []
err_new2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # original model
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # no 30mV/mm model
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
# original model
test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==30)]
for track in range(1,51):
    cell_errors = test[test['track']==track]['pred_error']
    err_new.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
# no 30mV/mm model
test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==30)]
for track in range(1,51):
    cell_errors = test[test['track']==track]['pred_error']
    err_new2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_test, err_new]
errors2 = [err_test2, err_new2]

pos1 = []
pos2 = []
for i in range(1,3):
    pos1.append(2*i-0.252)
    pos2.append(2*i+0.252)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM Interpolation')
ax.set_xlabel('Test Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4], labels=['Full Test Set', '30mV/mm Instances'])

plot1 = plt.plot([], [],'s', label='Trained on Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 30mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_boxplot_interpolation.pdf') #save the figure
    
Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

Q1 = np.percentile(err_new, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_new, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error on 30mV/mm:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_new), IQR, statistics.mean(err_new), statistics.stdev(err_new)))

Q1 = np.percentile(err_test2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test2), IQR, statistics.mean(err_test2), statistics.stdev(err_test2)))

Q1 = np.percentile(err_new2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_new2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 30 Test Error on 30mV/mm:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_new2), IQR, statistics.mean(err_new2), statistics.stdev(err_new2)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no30.csv')

print(df.head())
test = df[df['set']==2]
test['pred_error'] = test['pred_error']**2
test = test.groupby(['ef','track']).mean()
print(test.head())

sns.violinplot(y=test['pred_error'], showfliers=False, cut=0)

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_predictions_no200.csv')

err_test = []
err_new = []

err_test2 = []
err_new2 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    # original model
    test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
    # no 200mV/mm model
    test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==ef)]
    for track in range(1,51):
        cell_errors = test[test['track']==track]['pred_error']
        err_test2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
# original model
test = df[(df['set']==2) & (df['pred_dir'].notna())& (df['ef']==200)]
for track in range(1,51):
    cell_errors = test[test['track']==track]['pred_error']
    err_new.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
# no 200mV/mm model
test = df2[(df2['set']==2) & (df2['pred_dir'].notna())& (df2['ef']==200)]
for track in range(1,51):
    cell_errors = test[test['track']==track]['pred_error']
    err_new2.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
    
errors = [err_test, err_new]
errors2 = [err_test2, err_new2]

pos1 = []
pos2 = []
for i in range(1,3):
    pos1.append(2*i-0.252)
    pos2.append(2*i+0.252)
    
c1 = 'red'
c2 = 'blue'
cmed = 'black'
cmean = 'green'

fig, ax = plt.subplots()
ax.set_title('Prediction Errors for CNCC LSTM Extrapolation')
ax.set_xlabel('Test Set')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, positions=pos1, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c1, color=c1),
            whiskerprops=dict(color=c1),
            capprops=dict(color=c1),
            medianprops=dict(color=cmed))
ax.boxplot(errors2, positions=pos2, showmeans=True, showfliers=False, notch=False, widths=0.5, patch_artist=True,
            boxprops=dict(facecolor=c2, color=c2),
            whiskerprops=dict(color=c2),
            capprops=dict(color=c2),
            medianprops=dict(color=cmed))

plt.xticks([2,4], labels=['Full Test Set', '200mV/mm Instances'])

plot1 = plt.plot([], [],'s', label='Trained on Full Training Set', color=c1)
plot2 = plt.plot([], [],'s', label='Trained without 200mV/mm', color=c2)
medianplot = plt.plot([], [], label='Median Cell RMSE', color=cmed)
meanplot = plt.plot([], [], '^', label='Mean Cell RMSE', color=cmean)

ax.legend() # create the legend

plt.savefig('figures/cncc_boxplot_extrapolation.pdf') #save the figure


Q1 = np.percentile(err_test, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test), IQR, statistics.mean(err_test), statistics.stdev(err_test)))

Q1 = np.percentile(err_new, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_new, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original Test Error on 200mV/mm:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_new), IQR, statistics.mean(err_new), statistics.stdev(err_new)))

Q1 = np.percentile(err_test2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_test2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 200 Test Error:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_test2), IQR, statistics.mean(err_test2), statistics.stdev(err_test2)))

Q1 = np.percentile(err_new2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err_new2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('No 200 Test Error on 200mV/mm:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err_new2), IQR, statistics.mean(err_new2), statistics.stdev(err_new2)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/keratocyte_predictions.csv')
df3=pd.read_csv('data/keratocyte_transfer_predictions.csv')

err1 = []
err2 = []
err3 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
efs = df2['ef'].unique() #the list of EFs
for ef in efs:
    ker = df2[(df2['pred_dir'].notna()) & (df2['ef']==ef) & (df2['track']>=10)]
    ker_transfer = df3[(df3['pred_dir'].notna()) & (df3['ef']==ef) & (df2['track']>=10)]
    for track in ker['track'].unique():
        cell_errors1 = ker[ker['track']==track]['pred_error']
        cell_errors2 = ker_transfer[ker_transfer['track']==track]['pred_error']
        err2.append(math.sqrt(mean_squared_error(cell_errors1, np.zeros(len(cell_errors1)))))
        err3.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    
errors = [err1, err2, err3]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for Keratocytes')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,4), labels=['Benchmark', 'Keratocyte Model', 'CNCC to Keratocyte Transfer'])

plt.savefig('figures/keratocyte_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Keratocyte:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))
     
Q1 = np.percentile(err3, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err3, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer Learning:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err3), IQR, statistics.mean(err3), statistics.stdev(err3)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/NHK0001_predictions.csv')
df3=pd.read_csv('data/NHK0001_transfer_predictions.csv')

err1 = []
err2 = []
err3 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
efs = df2['ef'].unique() #the list of EFs
for ef in efs:
    ker = df2[(df2['pred_dir'].notna()) & (df2['ef']==ef) & (df2['track']>=5)]
    ker_transfer = df3[(df3['pred_dir'].notna()) & (df3['ef']==ef) & (df2['track']>=5)]
    for track in ker['track'].unique():
        cell_errors1 = ker[ker['track']==track]['pred_error']
        cell_errors2 = ker_transfer[ker_transfer['track']==track]['pred_error']
        err2.append(math.sqrt(mean_squared_error(cell_errors1, np.zeros(len(cell_errors1)))))
        err3.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    
errors = [err1, err2, err3]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for NHK0001 Keratinocytes')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,4), labels=['Benchmark', 'Keratinocyte Model', 'CNCC to Keratinocyte Transfer'])

plt.savefig('figures/NHK0001_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('NHK0001:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))
     
Q1 = np.percentile(err3, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err3, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err3), IQR, statistics.mean(err3), statistics.stdev(err3)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/NHK0002_predictions.csv')
df3=pd.read_csv('data/NHK0002_transfer_predictions.csv')

err1 = []
err2 = []
err3 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
efs = df2['ef'].unique() #the list of EFs
for ef in efs:
    ker = df2[(df2['pred_dir'].notna()) & (df2['ef']==ef) & (df2['track']>=5)]
    ker_transfer = df3[(df3['pred_dir'].notna()) & (df3['ef']==ef) & (df2['track']>=5)]
    for track in ker['track'].unique():
        cell_errors1 = ker[ker['track']==track]['pred_error']
        cell_errors2 = ker_transfer[ker_transfer['track']==track]['pred_error']
        err2.append(math.sqrt(mean_squared_error(cell_errors1, np.zeros(len(cell_errors1)))))
        err3.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    
errors = [err1, err2, err3]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for NHK0002 Keratinocytes')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,4), labels=['Benchmark', 'Keratinocyte Model', 'CNCC to Keratinocyte Transfer'])

plt.savefig('figures/NHK0002_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('NHK0002:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))
     
Q1 = np.percentile(err3, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err3, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err3), IQR, statistics.mean(err3), statistics.stdev(err3)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/NHK0802-YL112208_predictions.csv')
df3=pd.read_csv('data/NHK0802-YL112208_transfer_predictions.csv')

err1 = []
err2 = []
err3 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))
        
efs = df2['ef'].unique() #the list of EFs
for ef in efs:
    ker = df2[(df2['pred_dir'].notna()) & (df2['ef']==ef) & (df2['track']>=10)]
    ker_transfer = df3[(df3['pred_dir'].notna()) & (df3['ef']==ef) & (df2['track']>=10)]
    for track in ker['track'].unique():
        cell_errors1 = ker[ker['track']==track]['pred_error']
        cell_errors2 = ker_transfer[ker_transfer['track']==track]['pred_error']
        err2.append(math.sqrt(mean_squared_error(cell_errors1, np.zeros(len(cell_errors1)))))
        err3.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    
errors = [err1, err2, err3]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for NHK0802-YL112208 Keratinocytes')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,4), labels=['Benchmark', 'Keratinocyte Model', 'CNCC to Keratinocyte Transfer'])

plt.savefig('figures/NHK0802-YL112208_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('NHK0802-YL112208:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))
     
Q1 = np.percentile(err3, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err3, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err3), IQR, statistics.mean(err3), statistics.stdev(err3)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df3=pd.read_csv('data/cncc_reverse_predictions.csv')
df4=pd.read_csv('data/cncc_reverse_transfer_predictions.csv')
df2=pd.read_csv('data/cncc_reverse_benchmark_predictions.csv')

err1 = []
err2 = []
err3 = []
err4 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))

cells2 = df2[(df2['pred_dir'].notna()) & (df2['track']>20)]
cells3 = df3[(df3['pred_dir'].notna()) & (df3['track']>20)]
cells4 = df4[(df4['pred_dir'].notna()) & (df4['track']>20)]
for track in range(21,51):
    cell_errors2 = cells2[cells2['track']==track]['pred_error']
    cell_errors3 = cells3[cells3['track']==track]['pred_error']
    cell_errors4 = cells4[cells4['track']==track]['pred_error']
    
    err2.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    err3.append(math.sqrt(mean_squared_error(cell_errors3, np.zeros(len(cell_errors3)))))
    err4.append(math.sqrt(mean_squared_error(cell_errors4, np.zeros(len(cell_errors4)))))
    
errors = [err1, err2, err3, err4]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for Polarity Reversal Dataset')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,5), labels=['Benchmark', 'Benchmark Model on Reversal Set', 'Reversal Model', 'Original to Reversal Transfer'])

plt.savefig('figures/reverse_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('NHK0802-YL112208:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))
     
Q1 = np.percentile(err3, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err3, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err3), IQR, statistics.mean(err3), statistics.stdev(err3)))

Q1 = np.percentile(err4, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err4, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err4), IQR, statistics.mean(err4), statistics.stdev(err4)))

In [None]:
df=pd.read_csv('data/cncc_predictions.csv')
df2=pd.read_csv('data/cncc_reverse_benchmark_predictions.csv')
df3=pd.read_csv('data/cncc_reverse_predictions.csv')
df4=pd.read_csv('data/cncc_reverse_transfer_predictions.csv')

err1 = []
err2 = []
err3 = []
err4 = []

efs = [0,15,30,50,75,100,200] #the list of EFs
for ef in efs:
    cncc = df[(df['set']==2) & (df['pred_dir'].notna()) & (df['ef']==ef)]
    for track in range(1,51):
        cell_errors = cncc[cncc['track']==track]['pred_error']
        err1.append(math.sqrt(mean_squared_error(cell_errors, np.zeros(17))))

cells2 = df2[(df2['pred_dir'].notna()) & (df2['track']>20)]
cells3 = df3[(df3['pred_dir'].notna()) & (df3['track']>20)]
cells4 = df4[(df4['pred_dir'].notna()) & (df4['track']>20)]
for track in range(21,51):
    cell_errors2 = cells2[cells2['track']==track]['pred_error']
    cell_errors3 = cells3[cells3['track']==track]['pred_error']
    cell_errors4 = cells4[cells4['track']==track]['pred_error']
    
    err2.append(math.sqrt(mean_squared_error(cell_errors2, np.zeros(len(cell_errors2)))))
    err3.append(math.sqrt(mean_squared_error(cell_errors3, np.zeros(len(cell_errors3)))))
    err4.append(math.sqrt(mean_squared_error(cell_errors4, np.zeros(len(cell_errors4)))))
    
errors = [err1, err2, err4]
    
fig, ax = plt.subplots()
ax.set_title('Prediction Errors for Polarity Reversal Dataset')
ax.set_xlabel('Model')
ax.set_ylabel('RMSE of Individual Cells')
fig.set_size_inches(14, 8) # set figure size

ax.boxplot(errors, showmeans=True, showfliers=False)
plt.xticks(np.arange(1,len(errors)+1), labels=['Benchmark', 'Benchmark Model on Reversal Set', 'Original to Reversal Transfer'])

plt.savefig('figures/reverse_boxplot.pdf') #save the figure

Q1 = np.percentile(err1, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err1, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Benchmark:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err1), IQR, statistics.mean(err1), statistics.stdev(err1)))

Q1 = np.percentile(err2, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err2, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Original on Reverse:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err2), IQR, statistics.mean(err2), statistics.stdev(err2)))

Q1 = np.percentile(err4, 25, interpolation = 'midpoint') 
Q3 = np.percentile(err4, 75, interpolation = 'midpoint') 
IQR = Q3 - Q1 
print('Transfer on Reverse:')
print('median: {}, IQR: {}, mean: {}, standard deviation: {}'.format(statistics.median(err4), IQR, statistics.mean(err4), statistics.stdev(err4)))

In [None]:
df = pd.read_csv('data/cncc_predictions.csv')
test = df[(df['set']==2)]

efs = [0,15,30,50,75,100,200]
#efs = [50]
#plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.cm.viridis(np.linspace(0,1,len(efs))))

fig, ax = plt.subplots()
fig.set_size_inches(14, 8) # set figure size
fig.set_dpi(400)
#for i in range(len(efs)):
    #ax.plot([0,1], [i, 2*i])

max_avs = {}
mean_avs = {}
min_avs = {}

act_means = {}
act_stds = {}
for ef in efs:
    this_ef = test[test['ef']==ef]
    max_av = [] #max model-average dir prediction for each slice of this EF
    mean_av = [] #mean model-average dir prediction for each slice of this EF
    min_av = [] #min model-average dir prediction for each slice of this EF
    
    act_mean = [] #mean true dir value for each slice of this EF
    act_std = [] #std dev of mean true dir value for each slice of this EF
    
    for sl in range(21,38): #loop through slices where predictions are made
        this_slice = this_ef[this_ef['slice']==sl]
        
        act_mean.append(statistics.mean(this_slice['cum_dir']))
        act_std.append(statistics.stdev(this_slice['cum_dir']))
        
        all_avs = [] #list of the average predicted directedness for this ef+slice for each model
        for i in range(50): #loop through the 50 models
            this_model = this_slice['pred_dir{}'.format(i)].dropna()
            all_avs.append(statistics.mean(this_model))
        
        max_av.append(max(all_avs))
        mean_av.append(statistics.mean(all_avs))
        min_av.append(min(all_avs))
        
    max_avs[ef] = max_av
    mean_avs[ef] = mean_av
    min_avs[ef] = min_av
    act_means[ef] = act_mean
    act_stds[ef] = act_std
    
colors = ['#1e14de','#1873db','#2be3e0','#25c460','#ede72b','#ebba28','#db3627']

lines = []
shades = []
for i in range(len(efs)):
    ef = efs[i]
    #max_plot = plt.plot(range(21,38),max_avs[ef], color='k')
    #line, = plt.plot(range(21,38),mean_avs[ef], color=colors[i])
    line, = plt.plot(range(21,38),mean_avs[ef], color='k')
    lines.append(line)
    #min_plot = plt.plot(range(21,38),min_avs[ef], color='k')
    shade = plt.fill_between(range(21,38),min_avs[ef],max_avs[ef], color=colors[i], alpha=0.3)
    shades.append(shade)
    
    #plt.plot(range(21,38),act_means[ef], color=colors[i], linestyle='--') #actual mean
    plt.plot(range(21,38),act_means[ef], color='k', linestyle='--') #actual mean

#example lines to use for legend
ex_pred, = plt.plot([],color='k')
ex_true, = plt.plot([],color='k',linestyle='--')
    
lgd = ax.legend(shades + [ex_pred,ex_true],
          ['{}mV/mm'.format(ef) for ef in efs] + ['Mean Prediction','Mean Ground Truth'],
         bbox_to_anchor=(.97,.97), fancybox=True, shadow=True, loc='upper left', fontsize=22)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
ax.set_xlabel('Timestep',fontsize=28)
ax.set_ylabel('Directedness',fontsize=28)
ax.set_title('Average Ground Truth vs Predicted Directedness By Timestep',fontsize=32)
        
#plt.show()
fig.savefig('figures/1step_pred_confidence.pdf',bbox_extra_artists=(lgd,),bbox_inches='tight') #save the figure

In [None]:
df = pd.read_csv('data/cncc_predictions.csv')
test = df[(df['set']==2) & (df['ef']==50)]


fig, ax = plt.subplots()
fig.set_size_inches(14, 8) # set figure size

for track in range(1,51):
    cell = test[test['track']==track]
    plt.plot(range(1,38),cell['cum_dir'], 'b')

avgs = []
for sl in range(1,38):
    dirs = test[test['slice']==sl]
    avgs.append(statistics.mean(dirs['cum_dir']))
    
plt.plot(range(1,38),avgs,'r')

### Directedness Over Time

In [None]:
df = pd.read_csv('data/cncc.csv')

ef = 200
setnum = 2

fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot()

for track in range(1,51):
    cell = df.loc[(df['ef']==ef) & (df['set']==setnum) & (df['track']==track)]
    ax.plot(cell['slice']-1,cell['cum_dir'])

ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')

ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xlim([-.1, 36])
ax.set_xticks([])
ax.set_xticklabels([])
ax.set_xlabel('Time', fontsize=20)

ax.set_yticks([-1,0,1])
ax.set_yticklabels([-1,0,1], fontsize=20)
ax.set_ylabel('Directedness', fontsize=20)

fig.savefig('dir_over_time_200mVmm_set2.pdf')

In [None]:
df = pd.read_csv('data/cncc.csv')

ef = 200
setnum = 2

fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot()

for track in range(1,51):
    cell = df.loc[(df['ef']==ef) & (df['set']==setnum) & (df['track']==track)]
    ax.plot(cell['x'],cell['y'])
    
ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')

ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.set_xticks([])
ax.set_xticklabels([])

ax.set_yticks([])
ax.set_yticklabels([])

fig.savefig('groundtruth_200mVmm_set2.pdf')

In [None]:
dfr=pd.read_csv('data/cncc_reverse.csv')
df_left = dfr.loc[dfr['set']=='left']
df_right = dfr.loc[dfr['set']=='right']

dfr.head()

for i in range(1, int(max(df_right['track'])+1)):
    max_slice = max(df_left['slice'])

    last_x = df_left.loc[(df_left['track']==i)&(df_left['slice']==max_slice), 'x'].to_numpy()[0]
    last_y = df_left.loc[(df_left['track']==i)&(df_left['slice']==max_slice), 'y'].to_numpy()[0]

    dfr.loc[(dfr['track']==i)&(dfr['slice']==1)&(dfr['set']=='right'), 'x2'] = last_x
    dfr.loc[(dfr['track']==i)&(dfr['slice']==1)&(dfr['set']=='right'), 'y2'] = last_y

    for j in range(2, int(max(dfr['slice'])+1)):
        deltax = dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'x'].to_numpy()[0] - dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'x'].to_numpy()[0]
        prevx = dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'x2'].to_numpy()[0]
        deltay = dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'y'].to_numpy()[0] - dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'y'].to_numpy()[0]
        prevy = dfr.loc[(dfr['track']==i)&(dfr['slice']==j-1)&(dfr['set']=='right'), 'y2'].to_numpy()[0]

        dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'x2'] = prevx + deltax
        dfr.loc[(dfr['track']==i)&(dfr['slice']==j)&(dfr['set']=='right'), 'y2'] = prevy + deltay

dfr['dir2'] = dfr['x2']/np.sqrt(dfr['x2']**2 + dfr['y2']**2)

dfr = dfr[dfr['slice'] != 25]
dfr.loc[dfr['set']=='right','x'] = dfr.loc[dfr['set']=='right','x2']
dfr.loc[dfr['set']=='right','y'] = dfr.loc[dfr['set']=='right','y2']
dfr.loc[dfr['set']=='right','cum_dir'] = dfr.loc[dfr['set']=='right','dir2']
dfr.loc[dfr['set']=='right','slice'] = dfr.loc[dfr['set']=='right','slice'] + 24
dfr.loc[dfr['set']=='right','ef'] = -dfr.loc[dfr['set']=='right','ef']
dfr['volt'] = dfr['ef'] / 1000
dfr['ef'] = dfr['ef'].abs()

In [None]:
for track in dfr['track'].unique():
    plt.plot(dfr.loc[dfr['track']==track, 'slice'], dfr.loc[dfr['track']==track,'cum_dir'])

In [None]:
fig = plt.figure(figsize=(12,8),dpi=180)
plt.errorbar(dfr['slice'].unique(),dfr.groupby(['slice']).mean()['cum_dir'],
            yerr = dfr.groupby(['slice']).sem()['cum_dir'])
plt.title('Polarity Reversal Directedness',fontsize=28)
plt.xticks([0,10,20,30,40,49],fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel('Timestep',fontsize=24)
plt.ylabel('Directedness',fontsize=24)

fig.savefig('figures/reversal_averagedir.pdf')