In [None]:
# !pip install numpy --upgrade
# !pip install scipy --upgrade
# !pip install --user --force-reinstall numpy==1.20.0

In [None]:

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Function to generate dummy data
def generate_dummy_data(num_batches, events_per_batch, pulses_per_event, sensors):
    batches_meta = []
    batches_data = []
    
    for batch_id in range(num_batches):
        for event_id in range(events_per_batch):
            first_pulse_index = event_id * pulses_per_event
            last_pulse_index = (event_id + 1) * pulses_per_event - 1
            azimuth = np.random.uniform(0, 2 * np.pi)
            zenith = np.random.uniform(0, np.pi)
            
            batches_meta.append([batch_id, event_id, first_pulse_index, last_pulse_index, azimuth, zenith])
            
            for pulse_id in range(pulses_per_event):
                time = np.random.randint(0, 10000)
                sensor_id = np.random.randint(0, sensors)
                charge = np.random.normal(1, 0.1)
                auxiliary = np.random.choice([True, False])
                
                batches_data.append([event_id, time, sensor_id, charge, auxiliary])
    
    meta_df = pd.DataFrame(batches_meta, columns=['batch_id', 'event_id', 'first_pulse_index', 'last_pulse_index', 'azimuth', 'zenith'])
    data_df = pd.DataFrame(batches_data, columns=['event_id', 'time', 'sensor_id', 'charge', 'auxiliary'])
    
    return meta_df, data_df

# Parameters for dummy data generation
num_batches = 10
events_per_batch = 100
pulses_per_event = 50
sensors = 5160

train_meta, train_data = generate_dummy_data(num_batches, events_per_batch, pulses_per_event, sensors)
train_meta.to_parquet('train_meta.parquet')
train_data.to_parquet('train_data.parquet')

# Load the data
train_meta = pd.read_parquet('train_meta.parquet')
train_data = pd.read_parquet('train_data.parquet')

# Normalize the sensor IDs and times
train_data['sensor_id'] = train_data['sensor_id'] / train_data['sensor_id'].max()
train_data['time'] = train_data['time'] / train_data['time'].max()

# Combine the data into one dataset for each event
def combine_event_data(event_id, train_data, max_pulses):
    event_data = train_data[train_data['event_id'] == event_id]
    if len(event_data) < max_pulses:
        padding = pd.DataFrame(np.zeros((max_pulses - len(event_data), len(event_data.columns))), columns=event_data.columns)
        event_data = pd.concat([event_data, padding])
    return np.array(event_data[['time', 'sensor_id', 'charge', 'auxiliary']].values, dtype=np.float32)

max_pulses = pulses_per_event
train_meta['event_data'] = train_meta['event_id'].apply(lambda x: combine_event_data(x, train_data, max_pulses))

class NeutrinoModel(nn.Module):
    def __init__(self):
        super(NeutrinoModel, self).__init__()
        self.lstm = nn.LSTM(input_size=4, hidden_size=64, num_layers=2, batch_first=True)
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 2)  # Predicting azimuth and zenith

    def forward(self, x):
        h_lstm, _ = self.lstm(x)
        h_lstm = h_lstm[:, -1, :]  # Take the last output of the LSTM
        x = torch.relu(self.fc1(h_lstm))
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
model = NeutrinoModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Prepare the data for training
X = np.stack(train_meta['event_data'].values)
y = train_meta[['azimuth', 'zenith']].values

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

# Training the model
num_epochs = 50
batch_size = 32

for epoch in range(num_epochs):
    model.train()
    permutation = torch.randperm(X_train.size()[0])

    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X_train[indices], y_train[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
    
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}')

# Save the model
torch.save(model.state_dict(), 'neutrino_model.pth')

# Load the test data
test_meta = pd.read_parquet('test_meta.parquet')
test_data = pd.read_parquet('test_data.parquet')

# Preprocess the test data similarly
test_data['sensor_id'] = test_data['sensor_id'] / test_data['sensor_id'].max()
test_data['time'] = test_data['time'] / test_data['time'].max()
test_meta['event_data'] = test_meta['event_id'].apply(lambda x: combine_event_data(x, test_data, max_pulses))

# Prepare the test data
X_test = np.stack(test_meta['event_data'].values)
X_test = torch.tensor(X_test, dtype=torch.float32)

# Load the model
model.load_state_dict(torch.load('neutrino_model.pth'))
model.eval()

# Make predictions
with torch.no_grad():
    predictions = model(X_test)

# Save predictions to a CSV file
submission = pd.DataFrame({'event_id': test_meta['event_id'], 'azimuth': predictions[:, 0].numpy(), 'zenith': predictions[:, 1].numpy()})
submission.to_csv('submission.csv', index=False)


# epoch loss and epoch validation loss
