In [None]:
import glob
import os

from astropy.time import Time
import astropy.units as u
import h5py
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd 
import scipy.stats as st
from sklearn.neighbors import KernelDensity

import huxt as H
import huxt_analysis as HA
import SIR_HUXt as sir

mpl.rc("axes", labelsize=16)
mpl.rc("ytick", labelsize=16)
mpl.rc("xtick", labelsize=16)
mpl.rc("legend", fontsize=16)

In [None]:
dirs = sir.get_project_dirs()
data_dir = os.path.join(dirs['sir_analysis'], "shv_calibrate", "SIR_HUXt_*.hdf5")
files = glob.glob(data_dir)

fig, ax = plt.subplots(1, 4, figsize=(20, 5))

for i in range(ax.size):
    
    out_file = h5py.File(files[i], 'r')

    v_truth = out_file['cme_params'][:][4]

    for j in range(8):

        key = "analysis_{:02d}".format(j)
        
        v = out_file[key]['speed'][:]
        weights = out_file[key]['weight'][:]
        sz = 100*(weights - np.min(weights)) / (np.max(weights) - np.min(weights))

        y = np.ones(v.shape) + j
        ax[i].scatter(y, v, s=sz, marker='o')
        ax[i].set_title('SIR Run {:02d}'.format(i))
        
    out_file.close()

        
for a in ax:
    a.hlines(v_truth, 0, 9, linestyles=['--'], colors=['k'], label='True CME speed')
    a.set_xlim(0.5, 8.5)
    a.set_ylim(930,1070)
    a.set_xlabel('SIR analysis steps')

ax[0].set_ylabel('CME speed')
    
for a in ax[1:]:
    a.set_yticklabels([])
    
fig.subplots_adjust(left=0.15, bottom=0.1, right=0.98, top=0.98, wspace=0.05, hspace=0.05)
#fig.savefig('parameter_vs_step_{:02d}.png'.format(j))

In [None]:
dirs = sir.get_project_dirs()
data_dir = os.path.join(dirs['sir_analysis'], "shv_calibrate", "SIR_HUXt_*.hdf5")
files = glob.glob(data_dir)

# Get ranks and ensemble means of the priors
v_rank_prior = [] 
v_avg_prior = []

ess_prior = []

key = "analysis_00"

for f in files: 

    out_file = h5py.File(f, 'r')
    
    cme_truth_params = out_file['cme_params'][:]
    v_truth = cme_truth_params[4]
    t_truth = out_file['t_arrive'][()]
    
    # Rank the truth amongst prior ensemble
    # Speed first
    v = out_file[key]['speed'][:]            
    v_all = np.append(v,v_truth)
    v_ranks = st.rankdata(v_all)
    v_rank_prior.append(v_ranks[-1])
    # Ensemble mean of prior
    v_avg_prior.append(np.mean(v))
     
    # Now get effective sample size
    w = out_file[key]['weight'][:]   
    ess = 1 / np.sum(w**2)
    ess_prior.append(ess)

    out_file.close()

    
# Get ranks and ensemble means of the priors
v_rank_post = [] 
v_avg_post = []

ess_post = []

key = "analysis_07"

for f in files: 

    out_file = h5py.File(f, 'r')
    
    cme_truth_params = out_file['cme_params'][:]
    v_truth = cme_truth_params[4]
    t_truth = out_file['t_arrive'][()]
    
    # Rank the truth amongst prior ensemble
    # Speed first
    v = out_file[key]['speed'][:]            
    v_all = np.append(v,v_truth)
    v_ranks = st.rankdata(v_all)
    v_rank_post.append(v_ranks[-1])
    # Ensemble mean of prior
    v_avg_post.append(np.mean(v))
    
    # Now get effective sample size
    w = out_file[key]['weight'][:]   
    ess = 1 / np.sum(w**2)
    ess_post.append(ess)

    out_file.close()
    
v_avg_prior = np.array(v_avg_prior)
v_avg_post = np.array(v_avg_post)

#############################################################
# Plot rank hist and kernel density of prior and posterior V
fig, ax = plt.subplots(1, 2, figsize=(12, 6))

ax[0].hist([v_rank_prior, v_rank_post], bins=np.arange(0, len(v)), label=['Prior', 'Posterior'])

cmap = plt.get_cmap("tab10")
for v, col, label in zip([v_avg_prior, v_avg_post], [cmap(0), cmap(1)], ['Prior', 'Posterior']):
    
    kde = KernelDensity(kernel='gaussian', bandwidth=10).fit(v.reshape(-1,1))
    v_grid = np.arange(850, 1150, 1)
    log_pdf = kde.score_samples(v_grid.reshape(-1,1))
    pdf = np.exp(log_pdf)

    ax[1].plot(v_grid, pdf, color=col, label=label, linewidth=3)
    
ax[1].vlines(v_truth, 0, 0.02, linestyles=['--'], colors=['k'])
    
ax[1].yaxis.tick_right()
ax[1].yaxis.set_label_position("right")

ax[0].set_xlabel('Rank')
ax[0].set_ylabel('Frequency')
ax[0].legend()

ax[1].set_xlabel('Ensemble mean speed (km/s)')
ax[1].set_ylabel('Density')
ax[1].set_ylim(-0.0001, 0.017)
ax[1].set_xlim(900, 1100)
ax[1].legend()

fig.subplots_adjust(left=0.1,bottom=0.1, right=0.9, top=0.99, wspace=0.05)