# Set up example code

## Model setup

In [None]:
import subprocess

from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process import kernels
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import matplotlib.cm as cm
import matplotlib.pyplot as plt

from scipy.linalg import lapack
from scipy import stats
import emcee
import numpy as np

import importlib

import os
import pickle
from pathlib import Path

import src.reader as Reader

import pandas as pd

## Step 1: prepare input pickle file

### Load stuff from text files

Input Directory

In [None]:
LHCdir = '/scratch/user/cameron.parker/projects/JETSCAPE/runs/LHCdesign/QVir_Analysis/'

Prepare to read design to read in

In [None]:
df = pd.read_csv(LHCdir+'parameters.txt')
designlines = ["# 2760\n","# Version 1.0\n","# Parameter ",df.to_string(index=False)]

designfile = open("input/design.txt",'w')
designfile.writelines(designlines)
designfile.close()

RawDesign = Reader.ReadDesign("input/design.txt")
print(RawDesign)

In [None]:
# Read data files
Hads2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_CMS_PrPr2760_charged-hads.dat')
JetR2_2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_CMS_PrPr2760_jets-2.dat')
JetR3_2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_CMS_PrPr2760_jets-3.dat')
JetR4_2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_CMS_PrPr2760_jets-4.dat')
Pions2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_ALICE_PrPr2760_pions.dat')
Kaons2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_ALICE_PrPr2760_kaons.dat')
Protons2760Data = Reader.ReadData('/scratch/user/cameron.parker/projects/STAT/input/vac-data/Data_ALICE_PrPr2760_protons.dat')

# Read model prediction
Hads2760Pred = Reader.ReadPrediction(LHCdir+'HadronSpectraPrediction.dat')
JetsR2_2760Pred = Reader.ReadPrediction(LHCdir+'JetSpectraPredictionR2.dat')
JetsR3_2760Pred = Reader.ReadPrediction(LHCdir+'JetSpectraPredictionR3.dat')
JetsR4_2760Pred = Reader.ReadPrediction(LHCdir+'JetSpectraPredictionR4.dat')
Pions2760Pred = Reader.ReadPrediction(LHCdir+'PionSpectraPrediction.dat')
Kaons2760Pred = Reader.ReadPrediction(LHCdir+'KaonSpectraPrediction.dat')
Protons2760Pred = Reader.ReadPrediction(LHCdir+'ProtonSpectraPrediction.dat')

In [None]:
# xlabel ylabel xlog ylog
plotvars = [
    ["$p_T$","$d^2\sigma/dp_Td\eta$","linear","log"],
    ["$p_T$","$d^2\sigma/dp_Td\eta$","linear","log"],
    ["$p_T$","$d^2\sigma/dp_Td\eta$","linear","log"],
    ["$p_T$","$d^2\N/dp_Td\eta$","log","log"],
    ["$p_T$","$d^2\N/dp_Td\eta$","log","log"],
    ["$p_T$","$d^2\N/dp_Td\eta$","log","log"],
    ["$p_T$","$d^2\N/dp_Td\eta$","log","log"],
]

### Data formatting

In [None]:
# Initialize empty dictionary
AllData = {}

# Basic information
AllData["systems"] = ["PrPr2760"]
AllData["keys"] = RawDesign["Parameter"]
AllData["labels"] = RawDesign["Parameter"]
AllData["ranges"] = [(0.1, 0.5), (1.0, 5.0), (0.95, 3.0), (0.1, 1.0), (0.1, 0.4), (0.2, 0.5), (0.07, 0.2)]
AllData["observables"] = [('Vac', ['Jets2760R2', 
                                   'Jets2760R3', 
                                   'Jets2760R4', 
                                   'Hads2760', 
                                   'Pions2760', 
                                   'Kaons2760', 
                                   'Protons2760',
                                   ])]

# Data points
Data = {"PrPr2760": {"Vac": {"Jets2760R2": JetR2_2760Data["Data"], 
                             "Jets2760R3": JetR3_2760Data["Data"], 
                             "Jets2760R4": JetR4_2760Data["Data"], 
                             "Hads2760": Hads2760Data["Data"], 
                             "Pions2760": Pions2760Data["Data"], 
                             "Kaons2760": Kaons2760Data["Data"], 
                             "Protons2760": Protons2760Data["Data"], 
                             }}}

# Model predictions
Prediction = {"PrPr2760": {"Vac": {"Jets2760R2": {"Y": JetsR2_2760Pred["Prediction"], "x": JetR2_2760Data["Data"]['x']},
                                   "Jets2760R3": {"Y": JetsR3_2760Pred["Prediction"], "x": JetR3_2760Data["Data"]['x']},
                                   "Jets2760R4": {"Y": JetsR4_2760Pred["Prediction"], "x": JetR4_2760Data["Data"]['x']},
                                   "Hads2760": {"Y": Hads2760Pred["Prediction"], "x": Hads2760Data["Data"]['x']},
                                   "Pions2760": {"Y": Pions2760Pred["Prediction"], "x": Pions2760Data["Data"]['x']},
                                   "Kaons2760": {"Y": Kaons2760Pred["Prediction"], "x": Kaons2760Data["Data"]['x']},
                                   "Protons2760": {"Y": Protons2760Pred["Prediction"], "x": Protons2760Data["Data"]['x']},
                                   }}}


Covariance = Reader.InitializeCovariance(Data)
Covariance["PrPr2760"][("Vac", "Jets2760R2")][("Vac", "Jets2760R2")] = Reader.EstimateCovariance(JetR2_2760Data, JetR2_2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Jets2760R3")][("Vac", "Jets2760R3")] = Reader.EstimateCovariance(JetR3_2760Data, JetR3_2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Jets2760R4")][("Vac", "Jets2760R4")] = Reader.EstimateCovariance(JetR4_2760Data, JetR4_2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Hads2760")][("Vac", "Hads2760")] = Reader.EstimateCovariance(Hads2760Data, Hads2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Pions2760")][("Vac", "Pions2760")] = Reader.EstimateCovariance(Pions2760Data, Pions2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Kaons2760")][("Vac", "Kaons2760")] = Reader.EstimateCovariance(Kaons2760Data, Kaons2760Data, SysLength = {"default": 0.05})
Covariance["PrPr2760"][("Vac", "Protons2760")][("Vac", "Protons2760")] = Reader.EstimateCovariance(Protons2760Data, Protons2760Data, SysLength = {"default": 0.05})

# Assign data to the dictionary
AllData["design"] = RawDesign["Design"]
AllData["model"] = Prediction
AllData["data"] = Data
AllData["cov"] = Covariance
Nobs = len(AllData["observables"][0][1])

# Save to the desired pickle file
picklefile = 'input/pp-analysis.p'
with open(picklefile, 'wb') as handle:
    pickle.dump(AllData, handle, protocol = pickle.HIGHEST_PROTOCOL)

Reader.Initialize(picklefile)

### Optional: clean past files

In [None]:
# Clean past MCMC samples
if os.path.exists('cache/mcmc_chain.hdf'):
    os.remove("cache/mcmc_chain.hdf")

# Clean past emulator
for system in AllData["systems"]:
    if os.path.exists('cache/emulator/' + system + ".pkl"):
        os.remove('cache/emulator/' + system + ".pkl")

## Step 2: run emulator

In [None]:
! python3 -m src.emulator --retrain --npc 10

In [None]:
from src import lazydict, emulator
EmulatorPrPr2760 = emulator.Emulator.from_cache('PrPr2760')

## Step 3: MCMC sampling

In [None]:
if os.path.exists('cache/mcmc_chain.hdf'):
    os.remove("cache/mcmc_chain.hdf")
! python3 -m src.mcmc --nwalkers 500 --nburnsteps 500 1500 --picklefile $picklefile

## Step 4: Analyze posterior samples

In [None]:
import src
src.Initialize()
from src import mcmc
chain = mcmc.Chain(picklefile=picklefile)
MCMCSamples = chain.load()

In [None]:
! python3 -m src.plots posterior gp diag_emu --picklefile $picklefile

## Step 5: adding all sorts of plots

In [None]:
with chain.dataset() as d:
    W = d.shape[0]     # number of walkers
    S = d.shape[1]     # number of steps
    N = d.shape[2]     # number of paramters
    T = int(S / 200)   # "thinning"
    A = 20 / W
    figure, axes = plt.subplots(figsize = (15, 2 * N), ncols = 1, nrows = N)
    for i, ax in enumerate(axes):
        ax.set_ylabel(AllData["labels"][i])
        for j in range(0, W):
            ax.plot(range(0, S, T), d[j, ::T, i], alpha = A)
    plt.tight_layout()
    plt.savefig('plots/MCMCSamples.pdf', dpi = 192)

Parameter Distributions

In [None]:
from numpy import average


NDimension = len(AllData["labels"])
Ranges = np.array(AllData["ranges"]).T
scale = 2
figure, axes = plt.subplots(figsize = (scale * NDimension, scale * NDimension), ncols = NDimension, nrows = NDimension)
Names = AllData["labels"]
for i, row in enumerate(axes):
    for j, ax in enumerate(row):
        if i==j:
            y, x, _ = ax.hist(MCMCSamples[:,i], bins=50,
                    range=Ranges[:,i], histtype='step', color='green')
            ax.set_xlabel(Names[i])
            ax.set_xlim(*Ranges[:,j])

            maxindex = 0
            maxy = y.max()
            for k,thisy in enumerate(y): 
                if(thisy == maxy): 
                    maxindex = k

            xmax = (x[maxindex]+x[maxindex+1])/2
            print(AllData["labels"][i] + ": " + str(xmax))
        if i>j:
            ax.hist2d(MCMCSamples[:, j], MCMCSamples[:, i], 
                      bins=50, range=[Ranges[:,j], Ranges[:,i]], 
                      cmap='Greens')
            ax.set_xlabel(Names[j])
            ax.set_ylabel(Names[i])
            ax.set_xlim(*Ranges[:,j])
            ax.set_ylim(*Ranges[:,i])
        if i<j:
            ax.axis('off')

    
plt.tight_layout()
plt.savefig('plots/Correlation.pdf', dpi = 192)
# figure

In [None]:
from src.myplot import makeplot

Observables

In [None]:
Examples = MCMCSamples[np.random.choice(range(len(MCMCSamples)), 2500), :]
TempPrediction = {"PrPr2760": EmulatorPrPr2760.predict(Examples)}

makeplot(AllData, plotvars, TempPrediction, "Posteriors")

In [None]:
Examples = AllData["design"]
TempPrediction = {"PrPr2760": EmulatorPrPr2760.predict(Examples)}

makeplot(AllData, plotvars, TempPrediction, "Predicted Design")

Priors

In [None]:
TempPrediction = AllData["model"]

makeplot(AllData, plotvars, TempPrediction, "Priors")

In [None]:
# close all plots to save memory
plt.close('all')