# Set up CMS photon jet inference from full python package

## Model setup

Add some details here

In [1]:
import subprocess

from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process import kernels
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import matplotlib.cm as cm
import matplotlib.pyplot as plt

from scipy.linalg import lapack
from scipy import stats
import emcee
import numpy as np

import os
import pickle

## Step 1: prepare input pickle file

In [6]:
# Initialize empty dictionary
AllData = {}

# Basic information
AllData["systems"] = ["PbPb5020"]
AllData["keys"] = ["A", "B"]
AllData["labels"] = ["A", "B"]
AllData["ranges"] = [(0, 20), (0, 20)]
AllData["observables"] = [('XJA', [None])]

# Read input text files
Covariance    = np.loadtxt('input/CMSPhotonJet/Covariance.txt')
RawPrediction = np.loadtxt('input/CMSPhotonJet/ModelPrediction.txt')
DesignPoints  = np.loadtxt('input/CMSPhotonJet/Design.txt')
RawData       = np.loadtxt('input/CMSPhotonJet/Data.txt')

SingleData = {'x': RawData[:,0], 'y': RawData[:,1], 'yerr': {'stat': RawData[:, 2], 'sys': RawData[:,3]}}
SingleData["yerr"]["all"] = np.sqrt(SingleData["yerr"]["stat"]**2 + SingleData["yerr"]["sys"]**2)
Data = {"PbPb5020": {"XJA": {None: SingleData}}}

Prediction = {"PbPb5020": {"XJA": {None: {"Y": RawPrediction, "x": RawData[:,0]}}}}

CovarianceMatrix = {"PbPb5020": {"XJA": Covariance}}


# Assign data to the dictionary
AllData["design"] = DesignPoints
AllData["model"] = Prediction
AllData["data"] = Data
AllData["cov"] = CovarianceMatrix

# Save to the desired pickle file
with open('input/default.p', 'wb') as handle:
    pickle.dump(AllData, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [7]:
# Optional: clean files

# Clean past MCMC samples
os.remove("mcmc/chain.hdf")

FileNotFoundError: [Errno 2] No such file or directory: 'mcmc/chain.hdf'

## Step 2: run emulator

In [8]:
! python3 -m src.emulator --retrain --npc 10

[INFO][emulator] training emulator for system PbPb5020 (10 PC, 0 restarts)
[20 20]
Emulator design:
[[ 9.89368681 17.67830513]
 [13.27933734  8.93417263]
 [13.91407763  5.51081284]
 [ 6.51746591 10.26149551]
 [18.46737232 10.16692805]
 [ 9.0090532  15.72706132]
 [15.84854539  7.11468627]
 [ 3.13043352  4.17663049]
 [13.57825469 11.66924672]
 [11.77700967  2.8159296 ]
 [16.90814136  8.48922914]
 [ 2.32423428 16.28456341]
 [ 9.6398434  14.0320681 ]
 [ 7.38801505 18.57934962]
 [15.32564315  3.05049957]
 [ 3.71077323  7.70994371]
 [ 1.6912108   7.52057632]
 [18.1395542   4.93684076]
 [ 5.49534938  1.24825531]
 [14.739699   15.00057718]
 [ 4.96824166 13.3163702 ]
 [18.3866634   1.96906053]
 [ 2.18176932 17.96607754]
 [ 5.30053813  5.37384064]
 [ 5.75012018 16.18550963]
 [ 3.89496348  2.10575258]
 [16.66707606 19.02676448]
 [ 8.11026162 11.16650116]
 [ 0.90468556 12.1221526 ]
 [18.82772314 11.50579591]
 [10.71584872 18.01664813]
 [17.94731852 13.71556995]
 [12.82257434 14.24689256]
 [10.9567

## Step 3: MCMC sampling

In [9]:
! python3 -m src.mcmc --nwalkers 20 --nburnsteps 500 500

[INFO][mcmc] no existing chain found, starting initial burn-in
[INFO][mcmc] running 20 walkers for 250 steps
[INFO][mcmc] step 25: acceptance fraction: mean 0.5440, std 0.1388, min 0.2000, max 0.8400
[INFO][mcmc] step 50: acceptance fraction: mean 0.6050, std 0.1002, min 0.4000, max 0.8400
[INFO][mcmc] step 75: acceptance fraction: mean 0.6247, std 0.0698, min 0.5067, max 0.7733
[INFO][mcmc] step 100: acceptance fraction: mean 0.6430, std 0.0671, min 0.5100, max 0.8000
[INFO][mcmc] step 125: acceptance fraction: mean 0.6524, std 0.0650, min 0.5040, max 0.7840
[INFO][mcmc] step 150: acceptance fraction: mean 0.6560, std 0.0602, min 0.5333, max 0.7667
[INFO][mcmc] step 175: acceptance fraction: mean 0.6643, std 0.0522, min 0.5657, max 0.7600
[INFO][mcmc] step 200: acceptance fraction: mean 0.6760, std 0.0447, min 0.6050, max 0.7700
[INFO][mcmc] step 225: acceptance fraction: mean 0.6811, std 0.0419, min 0.6133, max 0.7733
[INFO][mcmc] step 250: acceptance fraction: mean 0.6838, std 0.037

## Step 4: Analyze posterior samples

In [44]:
from src import mcmc
chain = mcmc.Chain()
posterior_samples = chain.load()

In [46]:
! python3 -m src.plots --help

usage: plots.py [-h] [PLOT [PLOT ...]]

generate plots

positional arguments:
  PLOT        {observables_design, observables_posterior, posterior, design,
              gp, diag_emu} (default: all)

optional arguments:
  -h, --help  show this help message and exit


In [10]:
! python3 -m src.plots posterior gp diag_emu

[INFO][plots] generating plot: posterior
[INFO][plots] wrote plots/posterior.pdf
[INFO][plots] generating plot: gp
[INFO][plots] wrote plots/gp.pdf
[INFO][plots] generating plot: diag_emu
[INFO][plots] wrote plots/diag_emu.pdf


## Step 5: adding all sorts of plots