In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from glob import glob
import numpy as np
from matplotlib.lines import Line2D
from matplotlib.axes import Axes 

In [2]:
demeter_fn = 'Spark/demeter_spark_comprehensive_stats_all_data.csv'
demeter_data = pd.read_csv(demeter_fn)
demeter_data = demeter_data[['data','#cores','run','demeter_log_runtime','demeter_log_cputime']]
demeter_data.columns=['data','#cores','run','process time','cpu time']
demeter_data['Platform'] = 'Spark'
demeter_data['cpu time'] = demeter_data['cpu time']/1000
cores = demeter_data['#cores'].tolist()
demeter_data['non-cpu time'] = demeter_data['process time'] * demeter_data['#cores'] - demeter_data['cpu time']
demeter_data = demeter_data[['data','#cores','run','process time','cpu time','non-cpu time','Platform']]
demeter_data['cpu time'] = np.log10(demeter_data['cpu time']/60.0)
demeter_data['non-cpu time'] = np.log10(demeter_data['non-cpu time']/60.0)
demeter_data['process time'] = np.log10(demeter_data['process time']/60.0)

In [3]:
minerva_fn = 'final_results/minerva-all-usage.csv'
minerva_data = pd.read_csv(minerva_fn,index_col=0)
minerva_data = minerva_data[['data','cores','fold','duration','cpu','noncpu']]
minerva_data.columns=['data','#cores','run','process time','cpu time','non-cpu time']
minerva_data['Platform'] = 'Minerva'
minerva_data = minerva_data[['data','#cores','run','process time','cpu time','non-cpu time','Platform']]
minerva_data = minerva_data.loc[minerva_data['cpu time'] > 1000]
minerva_data['cpu time'] = np.log10(minerva_data['cpu time']/60.0)
minerva_data['non-cpu time'] = np.log10(minerva_data['non-cpu time']/60.0)
minerva_data['process time'] = np.log10(minerva_data['process time']/60.0)
minerva_data

Unnamed: 0,data,#cores,run,process time,cpu time,non-cpu time,Platform
0,drosophila,1,1,2.308529,2.318550,2.298271,Minerva
6,drosophila,1,2,2.292108,2.302114,2.281866,Minerva
12,drosophila,1,3,2.290962,2.299798,2.281942,Minerva
18,drosophila,1,4,2.292293,2.301573,2.282811,Minerva
24,drosophila,1,5,2.217045,2.219191,2.214888,Minerva
30,drosophila,1,6,2.168301,2.228058,2.098990,Minerva
36,drosophila,1,7,2.151676,2.211654,2.082067,Minerva
42,drosophila,1,8,2.151625,2.212942,2.080205,Minerva
48,drosophila,1,9,2.204527,2.206736,2.202307,Minerva
54,drosophila,1,10,2.198520,2.200121,2.196913,Minerva


In [4]:
def plt_time(dn):
    minerva_df = minerva_data.loc[minerva_data.data == dn]
    demeter_df = demeter_data.loc[demeter_data.data == dn]
    figure = plt.figure(figsize=(10,6))
    sns.set_style('whitegrid')
    sns.pointplot(data=minerva_df,x='#cores',y='cpu time',linestyles='-',color='red',legend=False)
    sns.pointplot(data=minerva_df,x='#cores',y='non-cpu time',linestyles='-',color='green',legend=False)
    sns.pointplot(data=minerva_df,x='#cores',y='process time',linestyles='-',color='blue',legend=False)

    sns.pointplot(data=demeter_df,x='#cores',y='cpu time',linestyles='--',color='red',legend=False)
    sns.pointplot(data=demeter_df,x='#cores',y='non-cpu time',linestyles='--',color='green',legend=False)
    sns.pointplot(data=demeter_df,x='#cores',y='process time',linestyles='--',color='blue',legend=False)
    lines = [Line2D([0], [0],linewidth=2, linestyle='-', color= 'red'),Line2D([0], [0],linewidth=2, linestyle='-', color= 'green'),Line2D([0], [0],linewidth=2, linestyle='-', color= 'blue'),Line2D([0], [0],linewidth=2, linestyle='--', color= 'red'),Line2D([0], [0],linewidth=2, linestyle='--', color= 'green'),Line2D([0], [0],linewidth=2, linestyle='--', color= 'blue')]
    plt.legend(lines,['Minerva cpu time','Minerva non-cpu time','Minerva elapsed time','Spark cpu time','Spark non-cpu time','Spark elapsed time'],fontsize=14,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    #plt.legend()

    max_y = max(minerva_df[['process time','cpu time','non-cpu time']].values.max().max(),demeter_df[['process time','cpu time','non-cpu time']].values.max().max())
    ymax = int(round(max_y))
    min_y = max(minerva_df[['process time','cpu time','non-cpu time']].values.min().min(),demeter_df[['process time','cpu time','non-cpu time']].values.min().min())
    ymin = int(round(min_y))
    
    yts = np.arange(ymin,ymax + 0.5,1)
    
    ylabels = [r"$10^{{{1:d}}}$".format(i,i) for i in range(ymin,int(round(max_y))+1)]
    
    plt.yticks(yts,ylabels,fontsize=16)
    plt.tick_params(axis='both',labelsize='16')
    plt.margins(0.02)
    plt.xlabel('#processors',fontsize=20)
    plt.ylabel('Computational time (Minute)',fontsize=20)
    plt.title('Computational time analysis for %s' %dn,fontsize=22)
    #plt.savefig('time_pointplot_%s.png' %dn,bbox_inches='tight')

In [5]:
#plt_time('elegans')
import matplotlib.backends.backend_pdf
pdf = matplotlib.backends.backend_pdf.PdfPages("final_results/computational_time.pdf")
for d in ['pf1','pf2','pf3','elegans','pacificus','remanei','drosophila','thaliana']:
    f = plt_time(d)
    pdf.savefig(f,bbox_inches='tight')
pdf.close()

In [6]:
for d in ['pf1','pf2','pf3','elegans','pacificus','remanei','drosophila','thaliana']:
    figd = plt_time(d)
    plt.savefig('final_results/individual_data_time/%s_time.png' %d,bbox_inches='tight')