In [23]:
import pandas as pd
from pathlib import Path

part_path = Path('part-1')
processed_path = Path(f'{part_path}/processed')

df_train = pd.read_csv(f"{part_path}/train.csv")
df_test = pd.read_csv(f"{part_path}/test.csv")


In [24]:
df_train = pd.read_csv(f"{processed_path}/train-embeddings-only.csv")
df_test = pd.read_csv(f"{processed_path}/test-embeddings-only.csv")

In [18]:
import numpy as np
from ast import literal_eval
def normalize_l2(x):
    x = np.array(x)
    if x.ndim == 1:
        norm = np.linalg.norm(x)
        if norm == 0:
            return x
        return x / norm
    else:
        norm = np.linalg.norm(x, 2, axis=1, keepdims=True)
        return np.where(norm == 0, x, x / norm)
    
def process_embeddings(embedding_series):
    return np.array([literal_eval(emb) for emb in embedding_series])

In [25]:
embeddings_a = process_embeddings(df_train.embedding)
embeddings_b = process_embeddings(df_train.embedding_b)
train_features = np.hstack([
    embeddings_a,
    embeddings_b
])

In [26]:
embeddings_a = process_embeddings(df_test.embedding)
embeddings_b = process_embeddings(df_test.embedding_b)
test_features = np.hstack([
    embeddings_a,
    embeddings_b
])

In [27]:
X = train_features
y = df_train["weight_a"].to_numpy()

In [32]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import math

class FundingDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).reshape(-1, 1)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class FundingNet(nn.Module):
    def __init__(self, input_size):
        super(FundingNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 1)
        )
    
    def forward(self, x):
        return self.model(x)

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Create datasets and dataloaders
train_dataset = FundingDataset(X_train, y_train)
val_dataset = FundingDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Setup training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FundingNet(input_size=X.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
best_val_loss = float('inf')
best_model = None

for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss = train_loss / len(train_loader)
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            val_loss += criterion(outputs, y_batch).item()
    
    val_loss = val_loss / len(val_loader)
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model.state_dict().copy()
    
    if (epoch + 1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, '
              f'Val Loss: {val_loss:.4f}')

# Load best model
model.load_state_dict(best_model)
print(f"Best validation MSE: {best_val_loss:.4f}")
print(f"Best validation RMSE: {math.sqrt(best_val_loss):.4f}")

Epoch [5/100], Train Loss: 0.0225, Val Loss: 0.0219
Epoch [10/100], Train Loss: 0.0144, Val Loss: 0.0204
Epoch [15/100], Train Loss: 0.0113, Val Loss: 0.0165
Epoch [20/100], Train Loss: 0.0110, Val Loss: 0.0140
Epoch [25/100], Train Loss: 0.0096, Val Loss: 0.0154
Epoch [30/100], Train Loss: 0.0081, Val Loss: 0.0148
Epoch [35/100], Train Loss: 0.0071, Val Loss: 0.0153
Epoch [40/100], Train Loss: 0.0061, Val Loss: 0.0149
Epoch [45/100], Train Loss: 0.0059, Val Loss: 0.0139
Epoch [50/100], Train Loss: 0.0056, Val Loss: 0.0142
Epoch [55/100], Train Loss: 0.0054, Val Loss: 0.0142
Epoch [60/100], Train Loss: 0.0045, Val Loss: 0.0146
Epoch [65/100], Train Loss: 0.0038, Val Loss: 0.0139
Epoch [70/100], Train Loss: 0.0043, Val Loss: 0.0147
Epoch [75/100], Train Loss: 0.0043, Val Loss: 0.0143
Epoch [80/100], Train Loss: 0.0031, Val Loss: 0.0144
Epoch [85/100], Train Loss: 0.0037, Val Loss: 0.0136
Epoch [90/100], Train Loss: 0.0035, Val Loss: 0.0137
Epoch [95/100], Train Loss: 0.0033, Val Loss: 0

In [28]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

class FundingDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y).reshape(-1, 1)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class TransformerRegressor(nn.Module):
    def __init__(self, input_size, d_model=128, nhead=4, num_layers=2, dropout=0.1):
        super(TransformerRegressor, self).__init__()
        
        # Project input to d_model dimensions
        self.input_projection = nn.Linear(input_size, d_model)
        
        # Positional encoding is not needed since our input is not sequential
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )
        
        # Final prediction layers
        self.regression_head = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )
        
    def forward(self, x):
        # Project input to d_model dimensions and add batch dimension for transformer
        x = self.input_projection(x)
        x = x.unsqueeze(1)  # Add sequence dimension
        
        # Pass through transformer
        x = self.transformer_encoder(x)
        
        # Take the output of the first (and only) token
        x = x.squeeze(1)
        
        # Final prediction
        x = self.regression_head(x)
        return x

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create datasets and dataloaders
train_dataset = FundingDataset(X_train, y_train)
val_dataset = FundingDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

# Setup training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TransformerRegressor(
    input_size=X.shape[1],
    d_model=1576,
    nhead=4,
    num_layers=2,
    dropout=0.1
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.5, 
    patience=5, 
    verbose=True
)

# Training loop
num_epochs = 100
best_val_loss = float('inf')
best_model = None
patience = 10
patience_counter = 0

for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        
        # Gradient clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        train_loss += loss.item()
    
    train_loss = train_loss / len(train_loader)
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            val_loss += criterion(outputs, y_batch).item()
    
    val_loss = val_loss / len(val_loader)
    
    # Learning rate scheduling
    scheduler.step(val_loss)
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model.state_dict().copy()
        patience_counter = 0
    else:
        patience_counter += 1
    
    # Early stopping
    if patience_counter >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}")
        break
    
    if (epoch + 1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, '
              f'Val Loss: {val_loss:.4f}')

# Load best model
model.load_state_dict(best_model)
print(f"Best validation MSE: {best_val_loss:.4f}")



Epoch [5/100], Train Loss: 0.1278, Val Loss: 0.1282
Epoch [10/100], Train Loss: 0.1271, Val Loss: 0.1455
Epoch [15/100], Train Loss: 0.1272, Val Loss: 0.1270
Epoch [20/100], Train Loss: 0.1259, Val Loss: 0.1261
Early stopping triggered at epoch 23
Best validation MSE: 0.1258
