# Searching for Higgs Boson Decay Modes with Deep Learning

### Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import seaborn as sns
import tensorflow as tf
import plotly.express as px
from sklearn.model_selection import train_test_split, cross_val_score
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

### Data Exploration

In [None]:
df = pd.read_csv(r"C:\Users\Snehal Thakur\Documents\Trainings\TMLC\DL\Project3.HiggsBosonEventDetection\training.csv")
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.describe(include="all")

In [None]:
df.info()

### Data Preprocessing 

In [None]:
# Check for missing values
df.isna().sum()

In [None]:
# check for duplicates
df.duplicated().sum()

### Univariate Analysis 

In [None]:
for col in df.columns:
    fig = px.histogram(df,x=col,width=850,height=400)
    fig.show()

In [None]:
# We should remove outliers i.e -999.0
df.replace(-999.0, 0,inplace=True)

### Univariate Analysis after removing outliers 

In [None]:
for col in df.columns:
    fig = px.histogram(df,x=col,width=850,height=400)
    fig.show()

### Bivariate Analysis 

In [None]:
# numeric v/s categoric
nums = ['EventId', 'DER_mass_MMC', 'DER_mass_transverse_met_lep',
       'DER_mass_vis', 'DER_pt_h', 'DER_deltaeta_jet_jet', 'DER_mass_jet_jet',
       'DER_prodeta_jet_jet', 'DER_deltar_tau_lep', 'DER_pt_tot', 'DER_sum_pt',
       'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'DER_lep_eta_centrality', 'PRI_tau_pt', 'PRI_tau_eta', 'PRI_tau_phi',
       'PRI_lep_pt', 'PRI_lep_eta', 'PRI_lep_phi', 'PRI_met', 'PRI_met_phi',
       'PRI_met_sumet', 'PRI_jet_num', 'PRI_jet_leading_pt',
       'PRI_jet_leading_eta', 'PRI_jet_leading_phi', 'PRI_jet_subleading_pt',
       'PRI_jet_subleading_eta', 'PRI_jet_subleading_phi', 'PRI_jet_all_pt',
       'Weight']

for col in nums:
    plt.figure(figsize=(12,5))
    sns.distplot(df[col][df.Label=='s'])
    sns.distplot(df[col][df.Label=='b'])
    plt.legend(['Label="s"','Label="b"'])
    plt.show()

In [None]:
# checking the target
plt.figure(figsize=(14,7))
# barplot
ax1 = plt.subplot(1,2,1)
cp = sns.countplot(x=df["Label"])
ax1.set_xlabel(" ")
ax1.set_ylabel(" ")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
sns.despine(top=True, right=True)
# pieplot
ax2 = plt.subplot(1,2,2)
plt.pie(df["Label"].value_counts(),
        labels=list(df["Label"].unique()),
        autopct='%1.2f%%',
        pctdistance=0.8,
        shadow=True,
        radius=1.3,
        textprops={'fontsize':14}
       )
ax2.set_xlabel(" ")
plt.xlabel('Composition of "Label"', fontsize=15, labelpad=20)
plt.subplots_adjust(wspace=0.4)
plt.show()

### Correlation Analysis 

In [None]:
cor = df.corr()
plt.figure(figsize=(18,10))
sns.heatmap(cor,annot=True,cmap='coolwarm')
plt.show()

### Prepare the test and training data sets

In [None]:
X = df.drop("Label",axis=1)
X

In [None]:
y=np.where(df["Label"] == "s", 0, 1)
y

In [None]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) 

In [None]:
# Train Test Split for validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=42) 

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)  

In [None]:
# CNN Model creation

# with tf.device('/device:GPU:0'):
classifier = Sequential()
classifier.add(Dense(32, activation='relu', input_dim=32, kernel_constraint=maxnorm(3)))
classifier.add(Dropout(0.5))
classifier.add(Dense(30, activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Dropout(0.5))
classifier.add(Dense(10, activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Dropout(0.5))
classifier.add(Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
classifier.summary()

In [None]:
# !pip install pydot

In [None]:
# !pip install graphviz

In [None]:
# from keras.utils.vis_utils import plot_model
# plot_model(classifier, to_file='/tmp/model.png', show_shapes=True,)

In [None]:
# Model complilation 
# with tf.device('/device:GPU:0'):
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
# history = model.fit(X_train, y_train, epochs=30, batch_size=250, validation_data=(X_val, y_val), callbacks=[callback])
history = classifier.fit(X_train, y_train, epochs=30, batch_size=250, validation_data=(X_val, y_val), callbacks=[callback])

###  Model Evaluation 

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()


### Making the prediction and evaluating the model 

In [None]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred

In [None]:
classifier.evaluate(X_test, y_test, verbose = 1) # Model evaluation

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
labels = ["s", "b"]

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

disp.plot(cmap=plt.cm.Blues)
plt.show()