## Put all imports necessary here

In [41]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import matplotlib.pyplot as plt

## Load the Data

In [42]:
# Load the training data
train_file = np.load('train.npz')
train_data = train_file['data']
print("train_data's shape", train_data.shape)

# Load the testing data
test_file = np.load('test_input.npz')
test_data = test_file['data']
print("test_data's shape", test_data.shape)


train_data's shape (10000, 50, 110, 6)
test_data's shape (2100, 50, 50, 6)


## Data Preprocessing

In [43]:
def enhanced_normalize(data):
    """Normalize all features separately"""
    means = data[..., :4].mean(axis=(0,1,2))  # x,y,vx,vy
    stds = data[..., :4].std(axis=(0,1,2))
    
    # Normalize first 4 features
    norm_data = data.copy()
    norm_data[..., :4] = (data[..., :4] - means) / stds
    
    # Handle heading (wrap around for angles)
    heading_mean = np.arctan2(np.sin(data[..., 4]).mean(), np.cos(data[..., 4]).mean())
    norm_data[..., 4] = (data[..., 4] - heading_mean) / np.pi  # Normalize to [-1,1]
    
    return norm_data, (means, stds, heading_mean)

# Normalize training data
train_norm, norm_stats = enhanced_normalize(train_data)
test_norm, _ = enhanced_normalize(test_data)

# 2. Data Preparation
def prepare_data(data, is_train=True):
    X = data[:, 0, :50, :]  # (N, 50, 6)
    if is_train:
        Y = data[:, 0, 50:, :2]  # (N, 60, 2)
    else:
        Y = None
    
    # Reshape for LSTM
    X = np.swapaxes(X, 0, 1)  # (50, N, 6)
    if Y is not None:
        Y = np.swapaxes(Y, 0, 1)  # (60, N, 2)
    
    return torch.FloatTensor(X), torch.FloatTensor(Y) if Y is not None else None

X_train, Y_train = prepare_data(train_norm)
X_test, _ = prepare_data(test_norm, is_train=False)


## Define Models

In [44]:
class EnhancedLSTM(nn.Module):
    def __init__(self, input_size=6, hidden_size=256):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=3,
            dropout=0.3,
            batch_first=False
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )
        
    def forward(self, x, pred_steps=60):
        # Process history
        _, (h, c) = self.lstm(x)
        
        # Initialize with better features
        last_state = torch.cat([
            x[-1:, :, :2],  # Last position
            x[-1:, :, 2:4],  # Last velocity
            x[-1:, :, 4:]   # Last heading
        ], dim=-1)
        
        predictions = []
        for _ in range(pred_steps):
            out, (h, c) = self.lstm(last_state, (h, c))
            pred = self.fc(out)
            predictions.append(pred)
            
            # Update with predicted position and maintained dynamics
            new_state = torch.cat([
                pred,
                last_state[:, :, 2:4],  # Keep same velocity
                last_state[:, :, 4:]    # Keep same heading
            ], dim=-1)
            last_state = new_state
            
        return torch.cat(predictions, dim=0)

model = EnhancedLSTM()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
criterion = nn.MSELoss()

## Do Training Loop

In [45]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Convert to PyTorch tensors
train_dataset = TensorDataset(X_train.permute(1,0,2), Y_train.permute(1,0,2))
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

for epoch in range(30):
    model.train()
    epoch_loss = 0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.permute(1,0,2).to(device), batch_y.permute(1,0,2).to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader):.4f}")

    # Early stop to hopefully stop overfitting
    if epoch_loss/len(train_loader) < 0.005:
        break


Epoch 1, Loss: 1.4999
Epoch 2, Loss: 0.9747
Epoch 3, Loss: 0.6345
Epoch 4, Loss: 0.4982
Epoch 5, Loss: 0.1059
Epoch 6, Loss: 0.0312
Epoch 7, Loss: 0.0131
Epoch 8, Loss: 0.0181
Epoch 9, Loss: 0.0089
Epoch 10, Loss: 0.0081
Epoch 11, Loss: 0.0065
Epoch 12, Loss: 0.0123
Epoch 13, Loss: 0.0057
Epoch 14, Loss: 0.0056
Epoch 15, Loss: 0.0069
Epoch 16, Loss: 0.0045


## Predict and Create Submission

In [46]:
model.eval()
with torch.no_grad():
    test_preds = model(X_test.to(device)).cpu().numpy()  # (60, 2100, 2)

# Denormalize only positions
pos_mean, pos_std = norm_stats[0][:2], norm_stats[1][:2]
test_preds = np.swapaxes(test_preds, 0, 1)  # (2100, 60, 2)
test_preds = test_preds * pos_std + pos_mean

# Create submission
submission = test_preds.reshape(-1, 2)
submission_df = pd.DataFrame(submission, columns=['x','y'])
submission_df.index.name = 'index'
submission_df.to_csv('enhanced_lstm_submission.csv')