# Compare methods notebook

In [1]:
import glob
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from sys import getsizeof
from objsize import get_deep_size
import matplotlib.pyplot as plt
from modelproblem import ModelProblem
from result_classes import Result,MethodResults

In [3]:
prob_name = "Calcium_Oscillate"
methods = ["smc", "pmc", "ptmcmc"]

mod_prob = ModelProblem(prob_name)
mod_prob.initialize()

grouped_results = [MethodResults(x) for x in methods]

for method, group_obj in zip(methods, grouped_results):
	result_dir = f"results/{prob_name}/{method}/"
	fnames = glob.glob(result_dir + "*.pkl")
	for fname in fnames:
		with open(fname, "rb") as f:
			results = pickle.load(f)
			print(f"{fname}:\t{os.path.getsize(fname)}\t{get_deep_size(results)}")
		result_obj = Result(results)
		group_obj.add_result(result_obj)
		break
print(type(mod_prob.problem))		

results/Calcium_Oscillate/smc/Calcium_Oscillate_smc_0seed.pkl:	14155424	2846
results/Calcium_Oscillate/pmc/Calcium_Oscillate_pmc_0seed.pkl:	12475136	2818
results/Calcium_Oscillate/ptmcmc/Calcium_Oscillate_ptmcmc_0seed.pkl:	954029554	2169
<class 'pypesto.problem.base.Problem'>


## Calculate the percentage of runs that converged in PT-MCMC

In [None]:
try:
	ptmcmc_idx = methods.index("ptmcmc")
	ptmcmc_group = grouped_results[ptmcmc_idx]
	n_runs = len(ptmcmc_group.all_runs)
	n_converged = 0
	for i in range(n_runs):
		if ptmcmc_group.all_runs[i].converged:
			n_converged+=1
	print(f"{n_converged:d} of {n_runs:d} runs have converged\nConvergence percentage: {n_converged/n_runs:.2f}")
except ValueError:
	pass

## Plot the distribution of ALL likelihoods from ALL runs

In [None]:
plt.figure(dpi=300)
llhs = np.array([x.get_llhs().flatten() for x in grouped_results], dtype=object)


#llhs[llhs<-20000] = -7500
llh_df = pd.DataFrame()
#plt.boxplot(x=range(1,len(methods)+1), llhs, showfliers=False)
for llh_arr, method in zip(llhs, methods):
	llh_df[method] = llh_arr

sns.boxplot(llh_df, showfliers=False)
plt.xlabel("Method"); plt.ylabel("log likelihood");

In [None]:
plt.figure(dpi=300)
calls = np.array([x.get_fun_calls() for x in grouped_results])
call_df = pd.DataFrame(columns=methods, data=calls.T)
sns.violinplot(call_df)
plt.xticks(range(len(methods)), [x.abbr for x in grouped_results])
plt.xlabel("Method"); plt.ylabel("# of Objective Function Calls");
print(f"MAX NUM FUNC CALLS: {np.max(calls)}")

In [None]:
from objsize import get_deep_size
bytes = 0
pmc_bytes=0
pmc_res = grouped_results[1].all_runs[7]
ptmcmc_res = grouped_results[2].all_runs[7]
for key in ptmcmc_res.__dict__.keys():
	pmc_val = pmc_res.__dict__[key]
	ptmcmc_val = ptmcmc_res.__dict__[key]
	a = get_deep_size(pmc_val)
	s = get_deep_size(ptmcmc_val)

	print(f"{key}:\tPMC->{a}\tPTMCMC->{s}")

	if isinstance(pmc_val, dict):
		for keydos in pmc_val.keys():
			b = get_deep_size(pmc_val[keydos])
			print(f"\t\t{keydos}:\tPMC->{b}")
	if isinstance(ptmcmc_val, dict):
		for keydos in ptmcmc_val.keys():
			c = get_deep_size(ptmcmc_val[keydos])
			print(f"\t\t{keydos}:\tPTMCMC->{c}")        
	if isinstance(pmc_val, list) or isinstance(pmc_val, np.ndarray):
		print("\t\t PMC:", pmc_val.dtype)
	if isinstance(ptmcmc_val, list) or isinstance(ptmcmc_val, np.ndarray):
		print("\t\t PTMCMC:", ptmcmc_val.dtype)
	pmc_bytes+=a
	bytes+=s
print(f"TOTAL: {pmc_bytes} {bytes}")

In [None]:
llh_threshold = -100000
grouped_results[1].get_convergence_times(llh_threshold)

In [None]:
llh_threshold = -2000
plt.figure(dpi=300)
#grouped_results[0].get_convergence_times(llh_threshold)
conv = np.array([x.get_convergence_times(llh_threshold) for x in grouped_results])
conv_df = pd.DataFrame(columns=methods, data=conv.T)

sns.swarmplot(conv_df)
plt.xticks(range(len(methods)), [x.abbr for x in grouped_results])
plt.xlabel("Method"); 
plt.ylabel(f"# Function Evaluations until Convergence");