# CERN Electron Mass Prediction

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Load and Check Data

## Data set Features

- `Run`, `Lumi`, `Event`: Identifiers of each recorded event  
- `MR`: Razor mass scale variable  
- `Rsq`: Razor variable (R²), sensitive to invisible energy  
- `E1`, `Px1`, `Py1`, `Pz1`: Four-vector of the leading megajet  
- `E2`, `Px2`, `Py2`, `Pz2`: Four-vector of the subleading megajet  
- `HT`: Scalar sum of jet transverse momenta  
- `MET`: Missing transverse energy  
- `nJets`: Number of jets with pT > 40 GeV  
- `nBJets`: Number of b-tagged jets with pT > 40 GeV  


In [4]:
filepath = 'Data\dielectron.csv'
Cern_df = pd.read_csv(filepath)
Cern_df.head()

Unnamed: 0,Run,Event,E1,px1,py1,pz1,pt1,eta1,phi1,Q1,E2,px2,py2,pz2,pt2,eta2,phi2,Q2,M
0,147115,366639895,58.7141,-7.31132,10.531,-57.2974,12.8202,-2.20267,2.17766,1,11.2836,-1.03234,-1.88066,-11.0778,2.14537,-2.34403,-2.07281,-1,8.94841
1,147115,366704169,6.61188,-4.15213,-0.579855,-5.11278,4.19242,-1.02842,-3.00284,-1,17.1492,-11.7135,5.04474,11.4647,12.7536,0.808077,2.73492,1,15.893
2,147115,367112316,25.5419,-11.4809,2.04168,22.7246,11.661,1.42048,2.9656,1,15.8203,-1.4728,2.25895,-15.5888,2.69667,-2.45508,2.14857,1,38.3877
3,147115,366952149,65.3959,7.51214,11.8871,63.8662,14.0619,2.21838,1.00721,1,25.1273,4.08786,2.59641,24.6563,4.84272,2.33021,0.565865,-1,3.72862
4,147115,366523212,61.4504,2.95284,-14.6227,-59.6121,14.9179,-2.09375,-1.37154,-1,13.8871,-0.277757,-2.4256,-13.6708,2.44145,-2.4237,-1.68481,-1,2.74718


In [6]:
Cern_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 19 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Run     100000 non-null  int64  
 1   Event   100000 non-null  int64  
 2   E1      100000 non-null  float64
 3   px1     100000 non-null  float64
 4   py1     100000 non-null  float64
 5   pz1     100000 non-null  float64
 6   pt1     100000 non-null  float64
 7   eta1    100000 non-null  float64
 8   phi1    100000 non-null  float64
 9   Q1      100000 non-null  int64  
 10  E2      100000 non-null  float64
 11  px2     100000 non-null  float64
 12  py2     100000 non-null  float64
 13  pz2     100000 non-null  float64
 14  pt2     100000 non-null  float64
 15  eta2    100000 non-null  float64
 16  phi2    100000 non-null  float64
 17  Q2      100000 non-null  int64  
 18  M       99915 non-null   float64
dtypes: float64(15), int64(4)
memory usage: 14.5 MB


In [9]:
Cern_df.isna().sum()

Run       0
Event     0
E1        0
px1       0
py1       0
pz1       0
pt1       0
eta1      0
phi1      0
Q1        0
E2        0
px2       0
py2       0
pz2       0
pt2       0
eta2      0
phi2      0
Q2        0
M        85
dtype: int64

only 85 nulls at M so we will drop this to not bother

In [11]:
Cern_df = Cern_df.dropna()

In [12]:
Cern_df.describe()

Unnamed: 0,Run,Event,E1,px1,py1,pz1,pt1,eta1,phi1,Q1,E2,px2,py2,pz2,pt2,eta2,phi2,Q2,M
count,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0,99915.0
mean,147935.420007,495747100.0,36.460107,0.135858,0.183211,-1.5091,14.418195,-0.064143,0.022002,-0.005495,44.028549,-0.003442,0.125963,-1.591798,13.804597,-0.072885,0.021772,-0.004234,30.019521
std,920.315526,403773600.0,41.225336,13.409322,13.474668,51.625332,12.391213,1.462626,1.799506,0.99999,46.761747,13.130364,13.171806,61.455194,12.463948,1.73866,1.816991,0.999996,25.255847
min,146511.0,6414.0,0.377928,-250.587,-126.079,-840.987,0.219629,-4.16538,-3.14158,-1.0,0.4725,-233.73,-145.651,-655.396,0.026651,-7.06479,-3.14158,-1.0,2.00008
25%,147114.0,202371600.0,8.472775,-5.23719,-5.2811,-15.89865,3.77367,-1.28465,-1.52658,-1.0,11.06555,-4.79387,-4.604685,-22.06265,3.737825,-1.892965,-1.55606,-1.0,12.4452
50%,147929.0,390291700.0,21.7402,0.141661,0.100544,-0.31336,12.9729,-0.061352,0.035015,-1.0,25.2914,-0.035653,0.084512,-0.692463,11.6908,-0.136708,0.027453,-1.0,21.2831
75%,149011.0,633933200.0,50.05925,5.71892,5.652355,13.2344,20.02605,1.145405,1.56268,1.0,66.98335,4.818905,5.063075,19.0418,19.6017,1.7693,1.59704,1.0,39.02505
max,149182.0,1791247000.0,850.602,134.539,147.467,760.096,265.578,2.62297,3.14142,1.0,948.375,227.33,166.283,935.558,281.654,3.06055,3.14129,1.0,109.999
