In [1]:
import MDAnalysis as mda
import time
import os
import numpy
from multiprocessing import Pool
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import vesicle_analysis_methods as vam
%matplotlib inline
print("MDA version: " + mda.__version__)

NameError: name '_radius_gyration_performance' is not defined

# Spuddafett Performance

In [None]:
hostname = "spuddafett"
fig_dir = "/nfs/homes/ikenney/Projects/vesicles/scripts/analysis/figs/"
data_dir = "/nfs/homes/ikenney/Projects/vesicles/scripts/analysis/data/"
systems_dir = "/nfs/homes/ikenney/Projects/vesicles/scripts/analysis/systems/"

values = [y.split('/')[-1] for y in glob.glob("systems/*")]

def fig_save(inputs):
    paths = [fig_dir+inputs.replace(" ","_").lower()+".png",fig_dir+inputs.replace(" ","_").lower()+".pdf"]
    for p in paths:
        plt.savefig(p)
        
if os.path.exists(data_dir+"performance_combined.df"):
    systems = pd.read_pickle(data_dir+"performance_combined.df")
    print("Pickle loaded")
else:
    systems = pd.DataFrame(index=values+['1_5M','3M'])
    location = '/nfs/homes/ikenney/Projects/vesicles/vesicle_library/lib/{}/'.format('rad_gpu')
    pd.DataFrame()
    tops = list((location+'%s/nvt/nvt.tpr') % v for v in values)
    traj = list((location+'%s/nvt/analysis.xtc') % v for v in values)
    gros = list((location+'%s/emin/emin.gro') % v for v in values)
    pdbs = list((location+'%s/emin/emin.pdb') % v for v in values)
    systems['tops'] = list((location+'%s/nvt/nvt.tpr') % v for v in systems.index.tolist())
    systems['traj'] = list((location+'%s/nvt/nvt.xtc') % v for v in systems.index.tolist())
    systems['gros'] = list((location+'%s/emin/emin.gro') % v for v in systems.index.tolist())
    systems['pdbs'] = list((location+'%s/emin/emin.pdb') % v for v in systems.index.tolist())
    systems['sizes'] = list(mda.Universe((location+'%s/emin/emin.tpr') % v).atoms.n_atoms for v in systems.index.tolist())
    print("Pickle not loaded. Creating new dataframe")
    systems.to_pickle("performance_combined.df")

In [None]:
testing = vam.integrity(pd.DataFrame(columns=systems.columns.tolist()),N=3)

In [None]:
testing = pd.read_pickle("backup.df")

In [None]:
testing = vam.integrity(testing,N=3)

In [None]:
testing

## Radius of Gyration

In [None]:
def performance(a):
    u = mda.Universe(a[0],a[1])
    vals = []
    for frame in u.trajectory:
        start = time.time()
        u.atoms.radiusOfGyration()
        vals.append(time.time()-start)
    return vals

Record times to perform radius of gyration calculations on each data set

In [None]:
p = Pool(len(values))
processlist = list([systems['tops'][x],systems['traj'][x],systems['sizes'][x]] for x in range(len(systems)))
data = p.map(performance,processlist)
systems['rgyr'] = data
p.close()
timedata = np.array([[np.mean(x),np.std(x)] for x in data])
systems['rgyr_mean'] = list(np.mean(r) for r in systems['rgyr'])
systems['rgyr_std'] = list(np.std(r) for r in systems['rgyr'])
systems.to_pickle("performance_combined.df")

In [None]:
#plt.figure(figsize=(8,4))
plt.title("Radius of Gyration Performance on {}".format(hostname))
plt.xlabel("System Size (atoms)")
plt.ylabel("Calculation time (s)")
plt.errorbar(systems['sizes'],systems['rgyr_mean'],yerr=systems['rgyr_std'])
#plt.savefig("/nfs/homes/ikenney/report/figs/geometric/calc_time_vs_system.png")
#plt.savefig("/nfs/homes/ikenney/report/figs/geometric/calc_time_vs_system.pdf")

## Load Times

Record times for loading universes with different schemes

### TPR and XTC

In [None]:
N = 40
def performanceload(a):
    res = a[3]
    def load():
        start = time.time()
        u = mda.Universe(a[0],a[1])
        return time.time() - start
    return [load() for _ in range(res)]

In [None]:
N = 40
p = Pool(len(values))
processlist = list([systems['tops'][x],systems['traj'][x],systems['sizes'],N] for x in range(len(systems.index.tolist())))
dataload = p.map(performanceload,processlist)
systems['load_tpr_xtc'] = dataload
p.close()
systems['load_tpr_xtc_mean'] = list(np.mean(r) for r in systems['load_tpr_xtc'])
systems['load_tpr_xtc_std'] = list(np.std(r) for r in systems['load_tpr_xtc'])
systems.to_pickle("performance_combined.df")

In [None]:
systems.to_pickle("performance_combined.df")

In [None]:
systems['load_tpr_xtc_median'] = list(np.median(r) for r in systems['load_tpr_xtc'])

In [None]:
N = 40
inputs = "TPR and XTC"
#plt.figure(figsize=(8,4))
plt.title("Load Time Performance on {0} loading {1} ({2} runs)".format(hostname,inputs,N))
plt.xlabel("System Size (atoms)")
plt.ylabel("Load time (s)")
A = plt.errorbar(systems['sizes'],systems['load_tpr_xtc_mean'],yerr=systems['load_tpr_xtc_std'])
fig_save(inputs)

### GRO only

In [None]:
N = 40
def performanceload(a):
    res = a[2]
    def load():
        start = time.time()
        u = mda.Universe(a[0])
        return time.time() - start
    return [load() for _ in range(res)]

In [None]:
p = Pool(len(values))
processlist = list([systems['gros'][x],systems['sizes'][x],N] for x in range(len(systems)))
dataload = p.map(performanceload,processlist) 
systems['load_gro'] = dataload
p.close()
systems['load_gro_mean'] = list(np.mean(r) for r in systems['load_gro'])
systems['load_gro_std'] = list(np.std(r) for r in systems['load_gro'])
systems.to_pickle("performance_combined.df")

In [None]:
inputs = "GRO"
#plt.figure(figsize=(8,4))
plt.title("Load Time Performance on {0} loading {1} ({2} runs)".format(hostname,inputs,N))
plt.xlabel("System Size (atoms)")
plt.ylabel("Load time (s)")
B = plt.errorbar(systems['sizes'],systems['load_gro_mean'],yerr=systems['load_gro_std'])
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower())
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower()+".pdf")

## PDB only

In [None]:
N = 40
def performanceload(a):
    res = a[2]
    def load():
        start = time.time()
        u = mda.Universe(a[0])
        return time.time() - start
    return [load() for _ in range(res)]

In [None]:
p = Pool(len(values))
processlist = list([systems['pdbs'][x],systems['sizes'][x],N] for x in range(len(systems)))
dataload = p.map(performanceload,processlist)   
systems["load_pdb"] = dataload
p.close()
systems['load_pdb_mean'] = list(np.mean(r) for r in systems['load_pdb'])
systems['load_pdb_std'] = list(np.std(r) for r in systems['load_pdb'])
systems.to_pickle("performance_combined.df")

In [None]:
inputs = "PDB"
#plt.figure(figsize=(8,4))
plt.title("Load Time Performance on {0} loading {1} ({2} runs)".format(hostname,inputs,N))
plt.xlabel("System Size (atoms)")
plt.ylabel("Load time (s)")
C = plt.errorbar(systems['sizes'],systems['load_pdb_mean'],yerr=systems['load_pdb_std'])
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower())
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower()+".pdf")

## GRO and XTC

In [None]:
def performanceload(a):
    res = a[3]
    def load():
        start = time.time()
        u = mda.Universe(a[0],a[1])
        return time.time() - start
    return [load() for _ in range(res)]

In [None]:
N = 40
p = Pool(len(values))
processlist = list([systems['gros'][x],systems['traj'][x],systems['sizes'][x],N] for x in range(len(systems)))
dataload = p.map(performanceload,processlist)    
systems['load_gro_xtc'] = dataload
p.close()
systems['load_gro_xtc_mean'] = list(np.mean(r) for r in systems['load_gro_xtc'])
systems['load_gro_xtc_std'] = list(np.std(r) for r in systems['load_gro_xtc'])
systems.to_pickle("performance_combined.df")

In [None]:
inputs = "GRO and XTC"
#plt.figure(figsize=(8,4))
plt.title("Load Time Performance on {0} loading {1} ({2} runs)".format(hostname,inputs,N))
plt.xlabel("System Size (atoms)")
plt.ylabel("Load time (s)")
D = plt.errorbar(systems['sizes'],systems['load_gro_xtc_mean'],yerr=systems['load_gro_xtc_std'])
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower())
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower()+".pdf")

### PDB and XTC

In [None]:
def performanceload(a):
    res = a[3]
    def load():
        start = time.time()
        u = mda.Universe(a[0],a[1])
        return time.time() - start
    return [load() for _ in range(res)]

In [None]:
N = 40
p = Pool(len(values))
processlist = list([systems['gros'][x],systems['traj'][x],systems['sizes'][x],N] for x in range(len(systems)))
dataload = p.map(performanceload,processlist)    
systems['load_pdb_xtc'] = dataload
p.close()
systems['load_pdb_xtc_mean'] = list(np.mean(r) for r in systems['load_pdb_xtc'])
systems['load_pdb_xtc_std'] = list(np.std(r) for r in systems['load_pdb_xtc'])
systems.to_pickle("performance_combined.df")

In [None]:
inputs = "PDB and XTC"
#plt.figure(figsize=(8,4))
plt.title("Load Time Performance on {0} loading {1} ({2} runs)".format(hostname,inputs,N))
plt.xlabel("System Size (atoms)")
plt.ylabel("Load time (s)")
E = plt.errorbar(systems['sizes'],systems['load_pdb_xtc_mean'],yerr=systems['load_pdb_xtc_std'])
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower())
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/"+inputs.replace(" ","_").lower()+".pdf")

In [None]:
plt.title("Load Time Performance on spuddafett (40 runs)")
plt.ylabel("Load Time (s)")
plt.xlabel("System Size (atoms)")
plots = [plt.errorbar(systems['sizes'],systems['load_tpr_xtc_mean'],yerr=systems['load_tpr_xtc_std']),plt.errorbar(systems['sizes'],systems['load_gro_mean'],yerr=systems['load_gro_std']),plt.errorbar(systems['sizes'],systems['load_pdb_mean'],yerr=systems['load_pdb_std']),plt.errorbar(systems['sizes'],systems['load_gro_xtc_mean'],yerr=systems['load_gro_xtc_std']),plt.errorbar(systems['sizes'],systems['load_pdb_xtc_mean'],yerr=systems['load_pdb_xtc_std'])]
plt.legend(plots,["TPR,XTC","GRO","PDB","GRO,XTC","PDB,XTC"],loc='best')
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/combined_load")
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/combined_load.pdf")

In [None]:
systems = testing
plt.title("Load Time Performance on spuddafett (40 runs)")
plt.ylabel("Load Time (s)")
plt.xlabel("System Size (atoms)")
plots = [plt.errorbar(systems['sizes'],systems['load_tpr_xtc_mean'],yerr=systems['load_tpr_xtc_std']),plt.errorbar(systems['sizes'],systems['load_gro_mean'],yerr=systems['load_gro_std']),plt.errorbar(systems['sizes'],systems['load_pdb_mean'],yerr=systems['load_pdb_std']),plt.errorbar(systems['sizes'],systems['load_gro_xtc_mean'],yerr=systems['load_gro_xtc_std']),plt.errorbar(systems['sizes'],systems['load_pdb_xtc_mean'],yerr=systems['load_pdb_xtc_std'])]
plt.legend(plots,["TPR,XTC","GRO","PDB","GRO,XTC","PDB,XTC"],loc='best')
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/combined_load")
#plt.savefig("/nfs/homes/ikenney/report/figs/loading/combined_load.pdf")

In [None]:
systems.drop(systems.index[10])

In [None]:
testing.drop(testing.index[-1])

In [None]:
pd.DataFrame(columns=testing.columns.tolist())

In [None]:
testing

In [None]:
testing

In [None]:
testing.to_pickle("backup.df")

In [None]:
testing

In [None]:
testing["pdbs"].notnull().loc["30"]