# Analysis

In this notebook I analyze the result of the runs with the rare event algorithm and produce figures to put in my PhD thesis

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib widget
import matplotlib
matplotlib.rc('font', size=18)
from tqdm.notebook import tqdm
import os
import uncertainties as unc

import general_purpose.uplotlib as uplt
import general_purpose.utilities as ut
import rea.reconstruct as rec


In [None]:
def get_run(folder, iteration=None):
    if iteration is None:
        try:
            run = {'folder': folder, 'rec_dict': ut.json2dict(f'{folder}/reconstructed.json')}
        except FileNotFoundError:
            # detect last iteration
            iterations = [int(f[1:]) for f in os.listdir(folder) if f[0] == 'i']
            iteration = np.max(iterations)
            run = {'folder': folder, 'rec_dict': rec.reconstruct(f'{folder}/i{iteration:04d}')}
            ut.dict2json(run['rec_dict'], f'{folder}/reconstructed.json')
    else:
        run = {'folder': folder, 'rec_dict': rec.reconstruct(f'{folder}/i{iteration:04d}')}
    
    run['abs_observables'] = {}
    run['rel_observables'] = {}
    for rname, r in run['rec_dict']['members'].items():
        last_segment_traj = np.load(f"{folder}/{run['rec_dict']['folders'][-1]}/{r['ancestry'][-1]}-traj.npy")
        first_segment_traj = np.load(f"{folder}/{run['rec_dict']['folders'][0]}/{r['ancestry'][0]}-traj.npy")
        run['abs_observables'][rname] = last_segment_traj[-1,1]
        run['rel_observables'][rname] = last_segment_traj[-1,1] - first_segment_traj[0,1]
        
    first_segment_traj = np.load(f"{folder}/{run['rec_dict']['folders'][0]}/{r['ancestry'][0]}-traj.npy")
    run['duration'] = last_segment_traj[-1,0] - first_segment_traj[0,0]
        
    return run

def compute_trajectories(run):
    folder = run['folder']
    rec_dict = run['rec_dict']
    rec_trajs = {}

    for rname, r in tqdm(rec_dict['members'].items()):
        traj = []
        for subfolder,e in zip(rec_dict['folders'], r['ancestry']):
            traj.append(np.load(f'{folder}/{subfolder}/{e}-traj.npy'))
        traj = np.concatenate(traj)

        rec_trajs[rname] = traj
    
    run['traj'] = rec_trajs
    
    return rec_trajs

def full_traj(path_to_end='veros/__test__/r2_minimal/i0009/e01'):
    it_folder, e = path_to_end.rsplit('/', 1)
    if '-' in e:
        e = e.split('-')[0]
    run_folder, it_folder = it_folder.rsplit('/', 1)
    i = int(it_folder[1:])
    l = len(it_folder) - 1
    # print(i,e)
    end = np.load(f'{run_folder}/{it_folder}/{e}-traj.npy')
    if i == 0:
        return end
    info = ut.json2dict(f'{run_folder}/{it_folder}/info.json')
    parent = info['members'][e]['parent']
    return np.concatenate([full_traj(f'{run_folder}/i{i-1:0{l}d}/{parent}'), end])
    

def killed_segments(run):
    cemetery = {}
    for i,ifol in enumerate(run['rec_dict']['folders'] + [run['rec_dict']['last_folder']]):
        if i == 0:
            continue
        prev_fol = run['rec_dict']['folders'][i-1]
        info = ut.json2dict(f'{run["folder"]}/{ifol}/info.json')
        survivors = set(m['parent'] for m in info['members'].values())
        dead = set(list(info['members'])) - survivors
        cemetery[prev_fol] = [full_traj(f'{run["folder"]}/{prev_fol}/{d}') for d in dead]
    return cemetery

def plot_traj(run, start=0, **kwargs):
    if 'traj' not in run:
        compute_trajectories(run)
    label = kwargs.pop('label', None)
    for traj in run['traj'].values():
        plt.plot(traj[start:,0]-traj[0,0], -traj[start:,1], label=label, **kwargs)
        label = None

def expectation(run, func, mode='abs', **kwargs):
    _f = np.array([func(x, **kwargs) for x in run[f'{mode}_observables'].values()])
    _w = np.array([r['weight'] for r in run['rec_dict']['members'].values()])
    # _w = np.exp(-np.array([r['cum_log_escore'] for r in rec_dict['members'].values()]))
    # _n = np.exp(rec_dict['cum_log_norm_factor'])

    mean = np.mean(_f*_w)
    std = np.sqrt(np.mean((_f*_w)**2)/len(_w))

    return unc.ufloat(mean,std)

def f(x, a):
    return float(x>a)

def get_infos(folder):
    i = 0
    infos = {}
    while True:
        try:
            infos[i] = ut.json2dict(f'{folder}/i{i:04d}/info.json')
            i += 1
        except FileNotFoundError:
            break
            
    return infos

## Visualization of killed trajectories and ensemble progress

In [None]:
folder = 'veros/__test__/r2_minimal'

# folder = 'demo/__test__/c-e50-fold0_minimal'
# folder = 'demo/__test__/k4-e50-fold0_minimal'
# folder = 'demo/__test__/k20-e50_minimal'
# folder = 'demo/__test__/k4-t100-e50-fold1_minimal'

run = get_run(folder)
_ = compute_trajectories(run)

In [None]:
cemetery = killed_segments(run)
cemetery_traj = {}
for k, v in cemetery.items():
    vt = v[0][:,0] - v[0][0,0]
    vt = np.stack([vt]+[v[i][:,1] for i in range(len(v))], axis=1)
    cemetery_traj[k] = vt

In [None]:
plt.close(1)
fig, ax = plt.subplots(num=1, figsize=(9,6))

for i,c in enumerate(list(cemetery_traj.values())[::-1]):
    plt.plot(c[:,0], -c[:,1:]-i*0.5, color='red', alpha=0.1)

plot_traj(run, alpha=0.5, color='black')


fig.tight_layout()

In [None]:
for l in list(cemetery.values())[-1]:
    print(l.shape, l[0,0])


In [None]:
cemetery.keys()

## The problem of ensemble collapse

In [None]:
# folder = 'veros/__test__/rA-k20t50_minimal'

# folder = 'demo/__test__/k4-t20-e50-fold0_minimal'
# folder = 'demo/__test__/k4-t20-e200-fold0_minimal'
# folder = 'demo/__test__/k4-t20-e1000-fold0_minimal'

# folder = 'veros-temp-noise/__test__/rY1AAtlN-k30t1_minimal'
folder = 'veros-temp-noise/__test__/rY1AAtlN-k20t1_minimal'

run = get_run(folder)
its = int(run['rec_dict']['last_folder'][1:])

In [None]:
plt.close(2)
fig, ax = plt.subplots(num=2, figsize=(9,6))

ax2 = ax.twinx()

# t_offset = 0.01
t_offset = 0

# t_adj = 0
t_adj = 1

t_end = 0
krs = []
for i in range(its):
    cur_fol = f'{folder}/i{i:04d}'
    next_fol = f'{folder}/i{i+1:04d}'
    info = ut.json2dict(f'{next_fol}/info.json')
    survivors = set(m['parent'] for m in info['members'].values())
    dead = set(list(info['members'])) - survivors

    t_end_prev = t_end
    trajs = {m: full_traj(f'{cur_fol}/{m}') for m in tqdm(info['members'])}
    ens_mean = -uplt.avg(np.stack([c[:,-1] for c in trajs.values()]), axis=0)
    t = list(trajs.values())[0][:,0] - list(trajs.values())[0][0,0] + t_adj
    t_end = t[-1]

    uplt.errorband(t[t>t_end_prev], ens_mean[t>t_end_prev], ax=ax, color='black', label='ensemble mean' if i == 0 else None)
    
    c = [-trajs[m][-1,-1] for m in dead]
    ax.scatter([t_end - t_offset]*len(c), c, color='red', alpha=0.5, label='killed' if i == 0 else None)

    c = [-trajs[m][-1,-1] for m in survivors]
    ax.scatter([t_end + t_offset]*len(c), c, color='green', alpha=0.5, label='cloned' if i == 0 else None)

    ax.axvline(t_end, color='gray', alpha=0.2)

    kill_ratio = len(dead)/len(info['members'])
    krs.append(kill_ratio)
    ax2.scatter([t_end], [kill_ratio], color='blue', alpha=1)

## Veros
# ax.set_xlabel('time [yr]')
# ax.set_ylabel(' '*12 + 'AMOC strength [Sv]')

# ax.set_ybound(6.9,8.4)
# ax.set_yticks(np.arange(7.4, 8.4, 0.2))

# ax2.set_ybound(0,3.2)
# ax2.set_yticks(np.arange(0.2,1.1,0.2))

# ax.grid(axis='y')
# ax2.grid(axis='y', linestyle='dashed')
# ax2.set_ylabel('kill ratio'+ ' '*38)

# ax.legend(loc='center left')

## OU
ax.set_xlabel('time')
ax.set_ylabel(' '*12 + '$X$')

ax.set_ybound(-6,1)
ax.set_yticks(np.arange(-4,1.1))

ax2.set_ybound(0,3.2)
ax2.set_yticks(np.arange(0.2,1.1,0.2))

ax.grid(axis='y')
ax2.grid(axis='y', linestyle='dashed')
ax2.set_ylabel('kill ratio'+ ' '*38)

ax.legend(loc=(0.01, 0.32))


fig.tight_layout()

In [None]:
# fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/ensemble-collapse-issue.pdf')
fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/evolution-ou-e1000.pdf')

### Track probabilities

In [None]:
its = int(run['rec_dict']['last_folder'][1:])
runs = [get_run(folder, i) for i in range(1,its)]
weights = np.stack([np.array([r['weight'] for r in run['rec_dict']['members'].values()]) for run in runs])
value_at_end = np.stack([np.array([np.load(f'{folder}/{run["rec_dict"]["last_folder"]}/e{m[1:]}-traj.npy')[-1,-1] for m in run['rec_dict']['members']]) for run in runs])

# add the first iteration
_w = np.ones(len(runs[0]['rec_dict']['members']))
_f = np.array([np.load(f'{folder}/i0000/e{m[1:]}-traj.npy')[-1,-1] for m in runs[0]['rec_dict']['members']])

weights = np.vstack([_w, weights])
value_at_end = -np.vstack([_f, value_at_end])

t = []
for i in range(its):
    c = full_traj(f"{folder}/i{i:04d}/e{list(run['rec_dict']['members'])[0][1:]}")
    t.append(c[-1,0] - c[0,0] + t_adj)
t = np.array(t)

weights.shape, value_at_end.shape

In [None]:
t

In [None]:
np.mean(weights, axis=1), np.mean(value_at_end, axis=1)

In [None]:
medians = np.median(value_at_end, axis=1)
mins = np.min(value_at_end, axis=1)
maxs = np.max(value_at_end, axis=1)
# medians = np.quantile(value_at_end, 0.1, axis=1)
probs = np.mean(weights*(value_at_end.T <= medians).T, axis=1)

In [None]:
probs[0]

In [None]:
plt.close(2)
fig, ax = plt.subplots(num=2, figsize=(9,6))

plt.semilogy(medians, probs, '-', color='gray', alpha=0.5)
plt.scatter(medians, probs, c=t, cmap='jet')
plt.colorbar(label='time [yr]')

## veros
plt.xlabel('Median AMOC strength [Sv]')
plt.ylabel('$p_{1/2}$') # probability of being below the median of the ensemble (before selection is applied)

fig.tight_layout()

In [None]:
plt.close(3)
fig, ax = plt.subplots(num=3, figsize=(9,6))

plt.plot(t,maxs, color='black', linestyle='dotted', label='max')
plt.plot(t,medians, color='black', label='median')
plt.plot(t,mins, color='black', linestyle='dashed', label='min')

## veros
plt.xlabel('time [yr]')
plt.ylabel('AMOC strength [Sv]')

## ou
# plt.xlabel('time')
# plt.ylabel('$X$')

# plt.legend(loc='upper center')
plt.legend()

ax2 = ax.twinx()
ax2.semilogy(t,probs, color='red')
plt.ylabel('$p_{1/2}$', color='red')
plt.setp(ax2.get_yticklabels(), color='red')

fig.tight_layout()

In [None]:
# fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/p0.5-veros-t5.pdf')
# fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/p0.5-ou.pdf')
fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/p0.5-veros-noise.pdf')

## New attractor

In [None]:
folder = 'veros/__test__/r2_minimal'
folder_relax = 'veros/__test__/r2-relax_minimal'

run = get_run(folder)
run_relax = get_run(folder_relax)

_ = compute_trajectories(run)
_ = compute_trajectories(run_relax)

In [None]:
plt.close(3)
fig, ax = plt.subplots(num=3, figsize=(9,6))

plot_traj(run, alpha=0.5, color='black')
plot_traj(run_relax, start=run['traj']['r01'].shape[0] - 1, alpha=0.5, color='blue')

fig.tight_layout()

In [None]:
from matplotlib.gridspec import GridSpec

In [None]:
plt.close(4)
fig = plt.figure(num=4, figsize=(10,6))
gs = GridSpec(4,6)
ax_plot = fig.add_subplot(gs[:,0:4])

stab = -20

plot_traj(run, alpha=0.2, color='black', label='$k=20$')
plot_traj(run_relax, start=run['traj']['r01'].shape[0] - 1, alpha=0.2, color='blue', label='$k=0$')

plt.axvline(run['traj']['r01'][-1,0] - run['traj']['r01'][0,0], color='blue', linestyle='dashed')
plt.axvline(run_relax['traj']['r01'][stab,0] - run_relax['traj']['r01'][0,0], color='blue', linestyle='dotted')
e01 = np.load(f'{folder}/i0000/e01-traj.npy')
plt.axvline(e01[-1,0] - e01[0,0], color='black', linestyle='dotted')

plt.xlabel('time [yr]')
plt.ylabel('AMOC strength [Sv]')
plt.legend()



ax_hist = fig.add_subplot(gs[:,4:])

bin_edges = np.linspace(ax_plot.get_ylim()[0], ax_plot.get_ylim()[1], 50)

#hist of the old attractor
pts_control = -np.concatenate([np.load(f'{folder}/i0000/e{e[1:]}-traj.npy')[:,1] for e in run['rec_dict']['members']])
hist, bin_edges = np.histogram(pts_control, bins=bin_edges, density=True)
ax_hist.plot(hist, 0.5*(bin_edges[:-1] + bin_edges[1:]), color='black')

#hist of the new attractor
pts = -np.concatenate([c[stab:, 1] for c in run_relax['traj'].values()])
hist, bin_edges = np.histogram(pts, bins=bin_edges, density=True)
ax_hist.plot(hist, 0.5*(bin_edges[:-1] + bin_edges[1:]), color='blue')

ax_hist.set_ylim(*ax_plot.get_ylim())
ax_hist.set_yticklabels([])

fig.tight_layout()

In [None]:
fig.savefig('../papers/PhD_Thesis/manuscript/Figures/REA-VerOS/new-attractor.pdf')

In [None]:
ctrl = uplt.avg(pts_control).item()
new = uplt.avg(pts).item()

In [None]:
(new.n - ctrl.n) / ctrl.s

### Probability ?

In [None]:
_w = np.array([r['weight'] for r in run['rec_dict']['members'].values()])
_f = -np.array([run['traj'][e][-1,-1] for e in run['rec_dict']['members']])
np.mean(_w)

In [None]:
_w

In [None]:
np.argmax(_w) + 1

In [None]:
plt.close(1)
fig, ax = plt.subplots(num=1, figsize=(9,6))

plt.scatter(_f, _w)

fig.tight_layout()

In [None]:
np.sum(_f < 7.9)

In [None]:
np.mean(_w*(_f < 7.9))

## Shorter resampling time

In [None]:
folder = 'veros/__test__/r01--k__16--nens__50--T__5_minimal'
# folder = 'veros/__test__/r3_minimal' # 20 year resampling time
run = get_run(folder)