In [None]:
# needed for figures to appear in colab 
from google.colab import output
output.enable_custom_widget_manager()

!rm -rf sample_data

# download reduced_electrons.parquet
!gdown https://drive.google.com/uc?id=1BVTkqniSfwJsVsUnNhM_FjMueXfotr2f

# download reduced_muons.parquet
!gdown https://drive.google.com/uc?id=1nJpyojI8CAwaq5P_ZcqgeKhfBEVCQn9n

# download pre-prepared analysis code
!rm -rf IceCube_MasterClass_at_Harvard2024
!git clone "https://github.com/kcarloni/IceCube_MasterClass_at_Harvard2024";

# Import some standard python libraries
import sys
import numpy as np
import matplotlib.pyplot as plt

# Import some custom libraries for this example
sys.path.insert(0, "./IceCube_MasterClass_at_Harvard2024/")
from src.ml_tools import *

In [None]:
# Load a Python class that will guide this ML example
MLHelper = MLHelper("reduced_muons.parquet",
                    "reduced_electrons.parquet")

In [None]:
width = 10 # how "wide" the network is, which will determine the number of paremeters
N_train = 1000 # how many training examples to use. Pick a number between 1 and 5000

# Make the network
MLHelper.MakeNetwork(width=width)

# Make the training data
MLHelper.MakeTrainingDataset(N_train=N_train)

In [None]:
loss_dict = MLHelper.train(num_epochs=2)
for epoch,losses in loss_dict.items():
    if epoch>0:
        plt.plot(epoch*len(losses)-1+np.arange(len(losses)+1),np.array([loss_dict[epoch-1][-1]]+losses))
    else:
        plt.plot(epoch*len(losses)+np.arange(len(losses)),np.array(losses))
plt.semilogy()
plt.ylabel("BCE Loss",fontsize=14)
plt.xlabel("Training Step",fontsize=14)
plt.show()

In [None]:
event_no = 1
MLHelper.plot_event(event_no,
                    reveal_network_predition=True,
                    reveal_true_label=True)

In [None]:
# Save network scores on the test data
pred_label_test = []
true_label_test = []
for input,target in MLHelper.test_dataloader:
    output = MLHelper.net(input).detach().numpy()
    true_label_test += list(target[:,0])
    pred_label_test += list(output[:])
pred_test = np.array(pred_label_test,dtype=float)
true_test = np.array(true_label_test,dtype=float)

In [None]:
bins = np.linspace(0,1,10)
fig = plt.figure(figsize=(8,6))
# Plot network score distributions
plt.hist(pred_test[true_test==0],alpha=0.5,bins=bins,label="Test data: muons",color="dodgerblue")
plt.hist(pred_test[true_test==1],alpha=0.5,bins=bins,label="Test data: electrons",color="orangered")
plt.xlabel("Electron score", fontsize=14)
plt.ylabel("Number of events",fontsize=14)
plt.semilogy()
plt.legend(fontsize=14)
plt.show()