In [None]:
import awkward as ak
import numpy as np
import uproot
import nbimporter
import import_ipynb
import matplotlib.pyplot as plt
from hffrag import fixedbinning
from hffrag import binneddensity
import seaborn as sns
import DeepSetNeuralNetArchitecture as DSNNA
from numpy.lib.recfunctions import structured_to_unstructured
import pandas as pd
from pandas.plotting import scatter_matrix
from sklearn.feature_selection import mutual_info_regression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
%matplotlib inline
#plt.rcParams['axes.facecolor'] = 'white'
#plt.rcParams['savefig.facecolor'] = 'red'
#plt.rc('text',usetex = False)
#plt.rc('font',family = 'Times New Roman')
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
sns.set_theme(style = "ticks")

In [None]:
#The data is being stored in a tree datastructure. 
#We access the charm root using this command
tree = uproot.open("hffrag.root:CharmAnalysis")

In [None]:
# Select the features we wish to study
track_features = ["AnalysisTracks_pt", "AnalysisTracks_eta", "AnalysisTracks_phi", "AnalysisTracks_z0sinTheta",
                  "AnalysisTracks_d0sig", "AnalysisTracks_d0", "AnalysisTracks_d0sigPV", "AnalysisTracks_d0PV"]
jet_features = ["AnalysisAntiKt4TruthJets_pt", "AnalysisAntiKt4TruthJets_eta", "AnalysisAntiKt4TruthJets_phi",
                "AnalysisAntiKt4TruthJets_ghostB_pt", "AnalysisAntiKt4TruthJets_ghostB_eta","AnalysisAntiKt4TruthJets_ghostB_phi"]

MAXEVENTS = 1e20
MAXTRACKS = 32
MASKVAL = 0

In [None]:
def make_mi_scores(X,y):
    mi_scores = mutual_info_regression(X,y)
    mi_scores = pd.Series(mi_scores,name = "MI Scores",index = X.columns)
    mi_scores = mi_scores.sort_values(ascending = False)
    return mi_scores
def plot_mi_scores(scores):
    scores = scores.sort_values(ascending = True)
    width = np.arange(len(scores))
    ticks = list(scores.index)
    plt.barh(width, scores)
    plt.yticks(width,ticks)
    plt.title("Mutual Informarion Scores")

In [None]:
#Read in the data from the root file
features = tree.arrays(jet_features+track_features,entry_stop = MAXEVENTS)

In [None]:
Figure = plt.figure()
jet_pt_distribution = binneddensity(features["AnalysisAntiKt4TruthJets_pt"][:,0],fixedbinning(0,500000,100),xlabel = "Transverse momentum [MeV] of first jets from each event")
jet_pt_distribution

In [None]:
Figure = plt.figure()
jet_pt_distribution = binneddensity(features["AnalysisAntiKt4TruthJets_eta"][:,0],fixedbinning(-np.pi,np.pi,100),xlabel = "Pseudorapidity of first jets from each event")
jet_pt_distribution

In [None]:
Figure = plt.figure()
jet_pt_distribution = binneddensity(features["AnalysisAntiKt4TruthJets_phi"][:,0],fixedbinning(-np.pi-0.5,np.pi+0.5,100),xlabel = "Phi angle [rad] of first jets from event")
jet_pt_distribution

In [None]:
Figure = plt.figure()
bhadrons = ak.flatten(features["AnalysisAntiKt4TruthJets_ghostB_pt"],axis = None)
jet_bhadron_pt_distribution = binneddensity(bhadrons,fixedbinning(-10000,500000,1000),xlabel = "Transverse momentum [MeV] of first bhadrons jets from each event")
jet_bhadron_pt_distribution

In [None]:
Figure = plt.figure()
Tracks_pt = binneddensity(features["AnalysisTracks_pt"][:,0],fixedbinning(400,3000,400),xlabel = "Transverse momentum [MeV] of first tracks from each event")
Tracks_pt

In [None]:
Figure = plt.figure()
Tracks_eta = binneddensity(features["AnalysisTracks_eta"][:,0],fixedbinning(-np.pi,np.pi,10000),xlabel = " Pseudorapidity of the first tracks from each event")
Tracks_eta

In [None]:
Figure = plt.figure()
Tracks_phi = binneddensity(features["AnalysisTracks_phi"][:,0],fixedbinning(-np.pi,np.pi,4000),xlabel = "The phi [rad] angles of first tracks from each event")
Tracks_phi

In [None]:
Figure = plt.figure()
Tracks_z0_sin_theta = binneddensity(features["AnalysisTracks_z0sinTheta"][:,0],fixedbinning(-1,1,400),xlabel = "The z0sin(theta) for the first track of each event ")
Tracks_z0_sin_theta

In [None]:
Figure = plt.figure()
Tracks_z0 = binneddensity(features["AnalysisTracks_d0"][:,0],fixedbinning(-1,1,400),xlabel = "The d0 of first tracks from each event")
Tracks_z0

In [None]:
Figure = plt.figure()
Tracks_pt = binneddensity(features["AnalysisTracks_d0sig"][:,0],fixedbinning(-10,10,400),xlabel = "The d0sig of first tracks from each event")
Tracks_pt

In [None]:
Figure = plt.figure()
Tracks_pt = binneddensity(features["AnalysisTracks_d0PV"][:,0],fixedbinning(-1,1,400),xlabel = "The d0PV of first tracks from each event")
Tracks_pt

In [None]:
Figure = plt.figure()
Tracks_pt = binneddensity(features["AnalysisTracks_d0sigPV"][:,0],fixedbinning(-10,10,400),xlabel = " The d0sigPVsig of first tracks from each event")
Tracks_pt

In [None]:
#Select the events of interest
events = features[ak.sum(features["AnalysisAntiKt4TruthJets_pt"] > 25000, axis = 1) > 0]
#Displays the number of jets being trained on
jets = events[jet_features][:,0]
print("The number of jets to train on is: ", len(jets))

#Select tracks from the events
tracks = events[track_features]

#Match the tracks to the jets
matchedtracks = tracks[DeepSetNeuralNet.Match_Tracks(jets,tracks)]

#Pad and Flatten the data
matchedtracks = DeepSetNeuralNet.flatten(matchedtracks, MAXTRACKS)

# Identify the the bottom jets and their associated tracks
bjets = ak.sum(jets["AnalysisAntiKt4TruthJets_ghostB_pt"] > 5000, axis=1) > 0
jets = jets[bjets]
bhads_pt = jets["AnalysisAntiKt4TruthJets_ghostB_pt"][:, 0].to_numpy()
bhads_eta = jets["AnalysisAntiKt4TruthJets_ghostB_eta"][:,0].to_numpy()
bhads_phi = jets["AnalysisAntiKt4TruthJets_ghostB_phi"][:,0].to_numpy()
bhads = np.stack([bhads_pt,bhads_eta,bhads_phi],axis = -1)

print("There are {} outputs".format(np.shape(bhads)[1]))
matchedtracks = matchedtracks[bjets]
print("There are {} inputs".format(np.shape(matchedtracks)[1]))

#Transform the jet and tracks to unstructed data.
jets = structured_to_unstructured(jets[jet_features[:-3]])
matchedtracks = structured_to_unstructured(matchedtracks)

#Fix the angles
jets = DSNNA.pt_eta_phi_2_px_py_pz_jets(jets).to_numpy()
tracks_p = DSNNA.pt_eta_phi_2_px_py_pz_tracks(matchedtracks.to_numpy())
bhads_cart = DSNNA.pt_eta_phi_2_px_py_pz_jets(bhads)
print(np.shape(tracks_p))
print(np.shape(matchedtracks[:, :, 3:]))
tracks = np.concatenate([tracks_p,matchedtracks[:,:,3:].to_numpy()],axis = 2)
print(np.shape(tracks))



In [None]:
Scaler = StandardScaler()
Num_events,Num_tracks,Num_features = np.shape(tracks)
tracks = np.reshape(tracks, newshape=(-1,Num_features))
tracks = Scaler.fit_transform(tracks)
tracks = np.reshape(tracks, newshape= (Num_events,Num_tracks,Num_features))
print(np.shape(tracks))
print(tracks[0,0,:])

In [None]:
pca = PCA()
track_pca = pca.fit_transform(tracks[0,:,:])
component_names = [f"PC{i+1}" for i in range(track_pca.shape[1])]
tracks_pca = pd.DataFrame(track_pca, columns = component_names)
tracks_pca.head()

In [None]:
def plot_variance(pca, width=8, dpi=100):
    # Create figure
    fig, axs = plt.subplots(1, 2)
    n = pca.n_components_
    grid = np.arange(1, n + 1)
    # Explained variance
    evr = pca.explained_variance_ratio_
    axs[0].bar(grid, evr)
    axs[0].set(
        xlabel="Component", title="% Explained Variance", ylim=(0.0, 1.0)
    )
    # Cumulative Variance
    cv = np.cumsum(evr)
    axs[1].plot(np.r_[0, grid], np.r_[0, cv], "o-")
    axs[1].set(
        xlabel="Component", title="% Cumulative Variance", ylim=(0.0, 1.0)
    )
    # Set up figure
    fig.set(figwidth=8, dpi=100)
    return axs
plot_variance(pca)

In [None]:
tracks[0,:,0]

In [None]:
tracks = np.ma.masked_equal(tracks, -999)
matchedtracks = np.ma.masked_equal(matchedtracks,-999)
print(tracks[0,:,1])
print(matchedtracks[0,:,1])
print(np.shape(tracks))
print(np.shape(matchedtracks))

In [None]:
binneddensity(matchedtracks[:,0,0],fixedbinning(0,50000,100),xlabel = "Transverse Momentum of the first tracks from b jet events")

In [None]:
binneddensity(matchedtracks[:,0,1],fixedbinning(-2.8,2.8,100),xlabel = "The Pseudorapidities of the first tracks from the b jet events")

In [None]:
binneddensity(matchedtracks[:,0,2],fixedbinning(-np.pi,np.pi,100),xlabel = "The Phi angles of the first tracks from the b jet events")

In [None]:
binneddensity(bhads[:,0],fixedbinning(0, 500000,100),xlabel = "Transverse Momentum of the B hadron jets")

In [None]:
binneddensity(bhads[:,1],fixedbinning(-2.5, 2.5,100),xlabel = "The Pseudorapidities of the B hadron jets")

In [None]:
binneddensity(bhads[:,2],fixedbinning(-np.pi, np.pi,100),xlabel = "Phi Angles of the B hadron jets")

In [None]:
binneddensity(tracks[:,0,0],fixedbinning(-998, 1000,100),xlabel = "X Momentum of the first tracks from b jet events")

In [None]:
binneddensity(tracks[:,0,1],fixedbinning(-998, 1000,100),xlabel = "Y Momentum of the first tracks from b jet events")

In [None]:
binneddensity(tracks[:,0,2],fixedbinning(-998, 1000,100),xlabel = "Z Momentum of the first tracks from b jet events")

In [None]:
binneddensity(bhads_cart[:,0],fixedbinning(-998, 1000,100),xlabel = "X Momentum of the bjets")

In [None]:
binneddensity(bhads_cart[:,1],fixedbinning(-998, 1000,100),xlabel = "Y Momentum of the bjets")

In [None]:
binneddensity(bhads_cart[:,2],fixedbinning(-998, 1000,100),xlabel = "Z Momentum of the bjets")

In [None]:
bhads[:,0]

In [None]:
bhads_cart[:,0]

In [None]:
Track_Data = pd.DataFrame(data = matchedtracks[:,0,:],columns = ["AnalysisTracks_pt", "AnalysisTracks_pseudorapidity", "AnalysisTracks_phi_angles", "AnalysisTracks_z0sinTheta",
                  "AnalysisTracks_d0sig", "AnalysisTracks_d0", "AnalysisTracks_d0sigPV", "AnalysisTracks_d0PV"])
Track_Cart_Data = pd.DataFrame(data = tracks[:,0,:],columns = ["AnalysisTracks_px", "AnalysisTracks_py", "AnalysisTracks_pz", "AnalysisTracks_z0sinTheta",
                  "AnalysisTracks_d0sig", "AnalysisTracks_d0", "AnalysisTracks_d0sigPV", "AnalysisTracks_d0PV"])          
Bhad_Data = pd.DataFrame(data = bhads, columns = ["Transverse Momentum","Pseudorapidity","Phi Angle"] )
Bhad_Cart_Data = pd.DataFrame(data = bhads_cart, columns = ["Momentum_px","Momentum_py","Momentum_pz"])
Track_Cart_Data["AnalysisTracks_Momenta"] = np.sqrt(Track_Cart_Data["AnalysisTracks_px"]**2 + Track_Cart_Data["AnalysisTracks_py"]**2 + Track_Cart_Data["AnalysisTracks_pz"]**2)

In [None]:
Track_Data.head()

In [None]:
Track_Cart_Data.head()

In [None]:
Bhad_Data.head()

In [None]:
Bhad_Cart_Data.head()

In [None]:
Track_Data.describe()

In [None]:
Track_Cart_Data.describe()

In [None]:
Bhad_Data.describe()

In [None]:
Bhad_Cart_Data.describe()

In [None]:
Track_Data.info()

In [None]:
Track_Cart_Data.info()

In [None]:
Bhad_Data.info()

In [None]:
Bhad_Cart_Data.info()

In [None]:
Data = pd.concat([Track_Data,Bhad_Data],axis = 1)
Data_Cart = pd.concat([Track_Cart_Data,Bhad_Cart_Data], axis = 1)

In [None]:
Data = Data[Data.AnalysisTracks_pt != -999]
Data_Cart = Data_Cart[Data_Cart.AnalysisTracks_px != -999]
Data.dropna(inplace = True)
Data_Cart.dropna(inplace = True)

In [None]:
Data.head()

In [None]:
Data_Cart.head()

In [None]:
Track_Data = Data[["AnalysisTracks_pt","AnalysisTracks_pseudorapidity","AnalysisTracks_phi_angles","AnalysisTracks_z0sinTheta","AnalysisTracks_d0sig","AnalysisTracks_d0","AnalysisTracks_d0sigPV","AnalysisTracks_d0PV"]]
Track_Cart_Data = Data_Cart[["AnalysisTracks_px","AnalysisTracks_py","AnalysisTracks_pz","AnalysisTracks_z0sinTheta","AnalysisTracks_d0sig","AnalysisTracks_d0","AnalysisTracks_d0sigPV","AnalysisTracks_d0PV"]]
Bhad_Data = Data[["Transverse Momentum", "Pseudorapidity","Phi Angle"]]
Bhad_Cart_Data = Data_Cart[["Momentum_px","Momentum_py","Momentum_pz"]]

In [None]:
attributesA = ["AnalysisTracks_pt","AnalysisTracks_pseudorapidity","AnalysisTracks_phi_angles","Transverse Momentum"]
attributes2A = ["AnalysisTracks_z0sinTheta","AnalysisTracks_d0sig","AnalysisTracks_d0","AnalysisTracks_d0sigPV","Transverse Momentum"]

In [None]:
attributesB = ["AnalysisTracks_px","AnalysisTracks_py","AnalysisTracks_pz","Momentum_px","Momentum_py","Momentum_pz"]
attributes2B = ["AnalysisTracks_z0sinTheta","AnalysisTracks_d0sig","AnalysisTracks_d0","AnalysisTracks_d0sigPV","Momentum_px","Momentum_py","Momentum_pz"]

In [None]:
Data.AnalysisTracks_d0.describe()

In [None]:
scatter_matrix(Data[attributesA],figsize = (12,8))

In [None]:
scatter_matrix(Data[attributes2A],figsize = (12,12))

In [None]:
scatter_matrix(Data_Cart[attributesB],figsize = (12,8))

In [None]:
scatter_matrix(Data_Cart[attributes2B],figsize = (12,8))

In [None]:
Data.hist(bins = 50,figsize = (12,12))
plt.show()

In [None]:
Data_Cart.hist(bins = 50,figsize = (12,12))
plt.show()

In [None]:
mi_scores_Transverse_Momentum = make_mi_scores(Track_Data,Bhad_Data["Transverse Momentum"])
mi_scores_Transverse_Momentum

In [None]:
plt.Figure(dpi = 200,figsize = (12,12))
plot_mi_scores(mi_scores_Transverse_Momentum)

In [None]:
mi_scores_Pseudorapidity = make_mi_scores(Track_Data,Bhad_Data["Pseudorapidity"])
mi_scores_Pseudorapidity

In [None]:
plt.Figure(dpi = 200, figsize = (8,5))
plot_mi_scores(mi_scores_Pseudorapidity)

In [None]:
mi_scores_Phi_Angle = make_mi_scores(Track_Data,Bhad_Data["Phi Angle"])
mi_scores_Phi_Angle

In [None]:
plt.Figure(dpi = 100, figsize = (12,12))
plot_mi_scores(mi_scores_Phi_Angle)

In [None]:
mi_scores_Momentum_px = make_mi_scores(Track_Cart_Data,Bhad_Cart_Data["Momentum_px"])
mi_scores_Momentum_px

In [None]:
plt.Figure(dpi = 100, figsize = (12,12))
plot_mi_scores(mi_scores_Momentum_px)

In [None]:
mi_scores_Momentum_py = make_mi_scores(Track_Cart_Data,Bhad_Cart_Data["Momentum_py"])
mi_scores_Momentum_py

In [None]:
plt.Figure(dpi = 100, figsize = (12,12))
plot_mi_scores(mi_scores_Momentum_py)

In [None]:
mi_scores_Momentum_pz = make_mi_scores(Track_Cart_Data,Bhad_Cart_Data["Momentum_pz"])
mi_scores_Momentum_pz

In [None]:
plt.Figure(dpi = 100, figsize = (12,12))
plot_mi_scores(mi_scores_Momentum_pz)

In [None]:
sum_tracks = np.sum(matchedtracks[:,:,0],axis = 1)
print(np.shape(tracks))
print(np.shape(sum_tracks))
print(sum_tracks[0])
print(matchedtracks[0,:,0])
binneddensity(sum_tracks-bhads[:,0],fixedbinning(-150000,300000,100),xlabel ="Sum_px_Tracks - Momentum True")

In [None]:
sum_tracks_cart_px = np.ma.sum(tracks[:,:,0],axis = 1)
print(sum_tracks_cart_px)
binneddensity(sum_tracks_cart_px-bhads_cart[:,0],fixedbinning(-250000,300000,100),xlabel ="Sum_Tracks_px - Momentum True")

In [None]:
binneddensity(sum_tracks_cart_px,fixedbinning(-90000,90000,100),xlabel = "Sum of the X momenta of the tracks of each event")

In [None]:
sum_tracks_cart_py = np.ma.sum(tracks[:,:,1],axis = 1)
binneddensity(sum_tracks_cart_py-bhads_cart[:,1],fixedbinning(-250000,300000,100),xlabel ="Sum_py_Tracks - Momentum True")

In [None]:
sum_tracks_cart_pz = np.ma.sum(tracks[:,:,2],axis = 1)
binneddensity(sum_tracks_cart_pz-bhads_cart[:,2],fixedbinning(-250000,300000,100),xlabel ="Sum_pz_Tracks - Momentum True")

In [None]:
binneddensity(sum_tracks,fixedbinning(-25000,150000,100),xlabel = "Sum Tracks")

In [None]:
Covariance_matrix = Data.corr()
Covariance_matrix["Transverse Momentum"].sort_values(ascending = False)

In [None]:
Covariance_matrix = Data_Cart.corr()
Covariance_matrix["Momentum_px"].sort_values(ascending = False)

In [None]:
Covariance_matrix = Data_Cart.corr()
Covariance_matrix["Momentum_py"].sort_values(ascending = False)

In [None]:
Covariance_matrix = Data_Cart.corr()
Covariance_matrix["Momentum_pz"].sort_values(ascending = False)

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads[:,0],
    y = sum_tracks,
    color = "red"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads[:,0]),np.max(bhads[:,0])])
ax.set_ylim([np.min(matchedtracks[:,0,0]),np.max(matchedtracks[:,0,0])])
ax.set_ylabel("The sum tranverse momenta of the tracks from each event")
ax.set_xlabel("Tranverse momenta of b hadron")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads[:,0],
    y = matchedtracks[:,0,0],
    color = "red"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads[:,0]),np.max(bhads[:,0])])
ax.set_ylim([np.min(matchedtracks[:,0,0]),np.max(matchedtracks[:,0,0])])
ax.set_ylabel("The tranverse momenta of first tracks from events")
ax.set_xlabel("Tranverse momenta of b hadron")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,0],
    y = sum_tracks_cart_px,
    color = "purple"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,0]),np.max(bhads_cart[:,0])])
ax.set_ylim([np.min(tracks[:,0,0]),np.max(tracks[:,0,0])])
ax.set_ylabel("The sum of the X momenta of the tracks from each event")
ax.set_xlabel("The X momenta of b hadron jets")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,0],
    y = tracks[:,0,0],
    color = "purple"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,0]),np.max(bhads_cart[:,0])])
ax.set_ylim([np.min(tracks[:,0,0]),np.max(tracks[:,0,0])])
ax.set_ylabel("The X momenta of first tracks from events")
ax.set_xlabel("The X momenta of b hadron jets")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,1],
    y = sum_tracks_cart_py,
    color = "orange"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,1]),np.max(bhads_cart[:,1])])
ax.set_ylim([np.min(tracks[:,0,1]),np.max(tracks[:,0,1])])
ax.set_ylabel("The sum Y momenta of the tracks from each event")
ax.set_xlabel("The Y momenta of b hadron jets")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,1],
    y = tracks[:,0,1],
    color = "orange"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,1]),np.max(bhads_cart[:,1])])
ax.set_ylim([np.min(tracks[:,0,1]),np.max(tracks[:,0,1])])
ax.set_ylabel("The Y momenta of the first tracks from events")
ax.set_xlabel("The Y momenta of b hadron jets")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,2],
    y = sum_tracks_cart_pz,
    color = "green"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,2]),np.max(bhads_cart[:,2])])
ax.set_ylim([np.min(tracks[:,0,2]),np.max(tracks[:,0,2])])
ax.set_ylabel("The sum Z momenta of the tracks from each event")
ax.set_xlabel("The Z momenta of b hadron jets")
plt.show()

In [None]:
fig,ax = plt.subplots(figsize = (12,12))
sns.scatterplot(
    x = bhads_cart[:,2],
    y = tracks[:,0,2],
    color = "green"
)
ax.set_title("Scatterplot ")
ax.set_xlim([np.min(bhads_cart[:,2]),np.max(bhads_cart[:,2])])
ax.set_ylim([np.min(tracks[:,0,2]),np.max(tracks[:,0,2])])
ax.set_ylabel("The Z momenta of the first tracks from events")
ax.set_xlabel("The Z momenta of b hadron jets")
plt.show()