In [1]:
import sys
COMP_NAME = "icecube-neutrinos-in-deep-ice"
sys.path.append(f"/home/anjum/kaggle/{COMP_NAME}/")

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import pairwise_distances
from torchmetrics.functional import pairwise_euclidean_distance


from src.config import INPUT_PATH, OUTPUT_PATH

In [2]:
sensors = pd.read_csv(INPUT_PATH / "sensor_geometry.csv")

In [3]:
batch = pd.read_parquet(INPUT_PATH / "train" / "batch_1.parquet")
event = batch.loc[24]
event = pd.merge(event, sensors, on="sensor_id").sort_values(by="time").reset_index(drop=True)
event.head(50)

Unnamed: 0,sensor_id,time,charge,auxiliary,x,y,z
0,3918,5928,1.325,True,303.41,335.64,206.58
1,4157,6115,1.175,True,-145.45,374.24,212.73
2,3520,6492,0.925,True,505.27,257.88,-174.6
3,5041,6665,0.225,True,-9.68,-79.5,181.0
4,2948,8054,1.575,True,576.37,170.92,357.88
5,860,8124,0.675,True,-290.66,-307.38,163.61
6,2440,8284,1.625,True,-526.63,-15.6,-178.17
7,1743,8478,0.775,True,500.43,-58.45,450.79
8,3609,8572,1.025,True,-313.6,237.44,348.01
9,5057,8680,3.975,True,-9.68,-79.5,-205.47


In [4]:
q_max = event["charge"].argmax()
q_max

9

In [5]:
xyz = event[["x", "y", "z"]].to_numpy()

dists = np.sqrt(np.sum((xyz - xyz[q_max])**2, -1))
dists

array([663.4393493 , 631.82649557, 616.40224188, 386.47      ,
       850.60449176, 516.81631282, 521.59927387, 831.4640715 ,
       706.51145808,   0.        ,   0.        , 641.11609042,
        14.02      , 756.33640915, 314.92722794, 740.02096815,
       492.33157628, 346.96730956, 467.67821694, 339.78994614,
       482.28639324, 468.07933195, 490.83218456, 504.69131774,
       383.67103865, 504.69131774, 490.83218456, 504.69131774,
       504.69131774, 525.3806226 , 561.86769386, 536.49942311,
       314.21283678, 520.62046723, 489.11336815, 463.7457603 ,
       547.33972028, 276.44750587, 160.54435711, 776.56323355,
       616.40224188, 384.57179993, 867.33842974, 577.34753676,
       642.79362979, 627.94187828, 706.51145808, 464.50506273,
         0.        , 634.32806481, 756.33640915, 532.13815133,
       288.63707073, 525.3806226 , 249.86970385, 346.96730956,
       529.58332999, 518.72505048, 359.18377065, 359.18377065,
       441.68008083])

In [6]:
delta_t = np.abs(event["time"].to_numpy() - event["time"].iloc[q_max])
delta_t

array([ 2752,  2565,  2188,  2015,   626,   556,   396,   202,   108,
           0,    43,    67,  1188,  1296,  1579,  1962,  2168,  2293,
        2343,  2405,  2537,  2736,  3521,  3526,  3533,  3547,  3659,
        3697,  3756,  3891,  3927,  4043,  4093,  4093,  4138,  4401,
        4582,  5321,  5525,  5603,  5843,  5920,  6529,  6663,  6922,
        6954,  6973,  7242,  7287,  7335,  7603,  8088,  8204,  8492,
        8516,  9075,  9132,  9373,  9415,  9422, 10351])

In [7]:
c_ice = 0.228  # m/ns
t_delay = 20  # ns

In [8]:
scattered = dists / c_ice >= delta_t  + t_delay

print(np.sum(scattered), np.sum(scattered) / len(scattered))
scattered

11 0.18032786885245902


array([ True,  True,  True, False,  True,  True,  True,  True,  True,
       False, False,  True, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False])

# Convert to PyTorch

In [9]:
data = torch.load(INPUT_PATH / "train_events" / "batch_1" / "event_24.pt")

t, indices = torch.sort(data.x[:, 3])
data.x = data.x[indices]
    
data

Data(x=[61, 9], y=[2], n_pulses=61)

In [10]:
q_max_idx = torch.argmax(data.x[:, 4])
q_max_idx

tensor(9)

In [11]:
xyz = data.x[:, :3]

dists = (xyz - xyz[q_max_idx]).pow(2).sum(-1).pow(0.5) * 500
dists

tensor([663.4394, 631.8265, 616.4022, 386.4700, 850.6045, 516.8163, 521.5993,
        831.4641, 706.5115,   0.0000,   0.0000, 641.1161,  14.0200, 756.3364,
        314.9272, 740.0210, 492.3316, 346.9673, 467.6782, 339.7899, 482.2864,
        468.0793, 490.8322, 504.6914, 383.6710, 504.6914, 490.8322, 504.6914,
        504.6914, 525.3806, 561.8677, 536.4995, 520.6205, 314.2128, 489.1134,
        463.7458, 547.3397, 276.4475, 160.5444, 776.5632, 616.4022, 384.5718,
        867.3384, 577.3475, 642.7936, 627.9419, 706.5115, 464.5050,   0.0000,
        634.3280, 756.3364, 532.1382, 288.6371, 525.3806, 249.8697, 346.9673,
        529.5833, 518.7251, 359.1837, 359.1837, 441.6801])

In [12]:
delta_t = (torch.abs(t - t[q_max_idx])) * 3e4
delta_t

tensor([ 2752.0000,  2565.0000,  2188.0000,  2014.9999,   626.0001,   556.0000,
          396.0000,   202.0000,   108.0000,     0.0000,    42.9999,    67.0000,
         1188.0000,  1296.0000,  1579.0000,  1962.0001,  2168.0000,  2293.0000,
         2342.9998,  2405.0000,  2537.0000,  2736.0000,  3521.0000,  3526.0000,
         3533.0000,  3547.0000,  3659.0000,  3697.0000,  3756.0000,  3891.0002,
         3926.9998,  4043.0000,  4093.0000,  4093.0000,  4138.0005,  4401.0000,
         4582.0000,  5321.0000,  5525.0000,  5603.0000,  5843.0000,  5920.0000,
         6529.0000,  6663.0000,  6922.0000,  6954.0000,  6973.0000,  7242.0000,
         7287.0000,  7335.0000,  7603.0005,  8088.0000,  8203.9990,  8492.0000,
         8516.0000,  9075.0000,  9132.0000,  9373.0000,  9415.0000,  9422.0000,
        10351.0000])

In [13]:
scattered = dists / c_ice >= delta_t  + t_delay

print(torch.sum(scattered), torch.sum(scattered) / len(scattered))
scattered

tensor(11) tensor(0.1803)


tensor([ True,  True,  True, False,  True,  True,  True,  True,  True, False,
        False,  True, False,  True, False,  True, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False])

In [14]:
data.x[~scattered].shape

torch.Size([50, 9])

In [15]:
len(data.x)

61

In [16]:
data = torch.load(INPUT_PATH / "train_events" / "batch_1" / "event_24.pt")

C_ICE = 0.228  # m/ns
T_DELAY = 0  # ns


t, indices = torch.sort(data.x[:, 3])
data.x = data.x[indices]

# Calculate the scattering flag
q_max_idx = torch.argmax(data.x[:, 4])
xyz = data.x[:, :3]
dists = (xyz - xyz[q_max_idx]).pow(2).sum(-1).pow(0.5) * 500
delta_t = (torch.abs(t - t[q_max_idx])) * 3e4
scattered = dists / C_ICE >= delta_t + T_DELAY

# Remove hits flagged as scattered light
data.x = data.x[~scattered]
t = t[~scattered]

data

Data(x=[49, 9], y=[2], n_pulses=61)

In [17]:
t.shape

torch.Size([49])

In [18]:
scattered = 2 * scattered.to(torch.float32) - 1
scattered

tensor([ 1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1.,
        -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1.])