In [31]:
import numpy as np
import torch
import json
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

event_info = json.load(open("2025_fit_params.json", "r"))

In [123]:
# define the datasets

def time_to_seconds(time_str):
    parts = time_str.split(':')
    seconds = float(parts[-1])
    for i, part in enumerate(reversed(parts[:-1])):
        seconds += int(part) * 60 ** (i+1)
    if '.' not in time_str:
        return seconds - 0.5
    else:
        return seconds - 0.005

points_table = json.load(open("2025_points_table.json", "r"))
points = np.array(points_table["Points"], dtype=float) + 0.5

def get_data(gender, event):
    times = np.array(points_table[gender][event]['Scores'])
    times = np.array([time_to_seconds(t) if t != "-" else np.nan for t in times])
    # curve fitting
    x_data = np.log(times[~np.isnan(times)])
    y_data = points[~np.isnan(times)]
    attributes = np.ones((x_data.shape[0],4))
    attributes[:,0] = np.log(points_table[gender][event]['Distance'])
    attributes[:,1] = np.log(times[~np.isnan(times)] / points_table[gender][event]['Distance'])
    if gender != 'Women':
        attributes[:,2] = 0
    if points_table[gender][event]['Surface'] != 'road':
        attributes[:,3] = 0
    # if points_table[gender][event]['Surface'] != 'shorttrack':
        # attributes[:,4] = 0
    # if points_table[gender][event]['Category'] != 'Racewalk':
        # attributes[:,5] = 0
    x_data = np.concatenate([x_data.reshape(-1,1), attributes], axis=1)
    return x_data, y_data

fit_params = {"Men": {},
              "Women": {}}

xx = []
yy = []
for gender in fit_params.keys():
    for event in points_table[gender].keys():
        # if points_table[gender][event]['Category'] in ['Sprints', 'Middle Distance', 'Long Distance']:
        if points_table[gender][event]['Category'] in ['Racewalk']:
        # if points_table[gender][event]['Category'] in ['Middle Distance', 'Long Distance', 'Racewalk']:
            data = get_data(gender, event)
            xx.append(data[0])
            yy.append(data[1])
xx = np.vstack(xx)
yy = np.concatenate(yy)
xx.shape, yy.shape

((43503, 5), (43503,))

In [174]:
# Scale features
scaler_X = StandardScaler()
X_numeric = xx[:,:3]
X_binary = xx[:,3:]
X_numeric_scaled = scaler_X.fit_transform(X_numeric)
X_scaled = np.concatenate([X_numeric_scaled, X_binary], axis=1)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(yy.reshape(-1, 1))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.05, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [185]:
# define the neural network

class PointsNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(5, 128),  # 5 inputs
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64,16),
            nn.ReLU(),
            nn.Linear(16, 1)   # 1 output: points (scaled)
        )

    def forward(self, x):
        return self.model(x)

model = PointsNet()

In [187]:
# train model

criterion = nn.MSELoss()
# criterion = nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

n_epochs = 500
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()

    y_pred = model(X_train_tensor)
    loss = criterion(y_pred, y_train_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")

Epoch 0 | Loss: 1.0264
Epoch 100 | Loss: 0.0008
Epoch 200 | Loss: 0.0004
Epoch 300 | Loss: 0.0022
Epoch 400 | Loss: 0.0002


In [188]:
# Example:
time = '20:42.38'
distance = 5
time = '13:22.02'
distance = 3
# time = '6:41.42'
# distance = 1.5
gender = 0
road = True
shorttrack = False
racewalk = False

def prepare_event(time_str, distance):
    parts = time_str.split(':')
    seconds = float(parts[-1])
    for i, part in enumerate(reversed(parts[:-1])):
        seconds += int(part) * 60 ** (i+1)
    return np.log(seconds), np.log(distance), np.log(seconds/distance)

t, d, p = prepare_event(time, distance)

# new_sample = np.array([[t, d, p, gender, road, shorttrack, racewalk]])  # time, distance, gender, road, shorttrack
new_sample = np.array([[t, d, p, gender, road]])
new_sample_scaled = np.concatenate([scaler_X.transform(new_sample[:,:3]),new_sample[:,3:]], axis=1)
new_sample_tensor = torch.tensor(new_sample_scaled, dtype=torch.float32)

model.eval()
with torch.no_grad():
    prediction_scaled = model(new_sample_tensor)
    prediction_points = scaler_y.inverse_transform(prediction_scaled.numpy())

print(f"Predicted points: {prediction_points[0][0]:.2f}")

Predicted points: 847.25


Train an ensemble

In [None]:
def train_single_model(X_train_tensor, y_train_tensor, n_epochs=1500, lr=0.001):
    model = PointsNet()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(n_epochs):
        model.train()
        optimizer.zero_grad()
        y_pred = model(X_train_tensor)
        loss = criterion(y_pred, y_train_tensor)
        loss.backward()
        optimizer.step()

    return model

In [189]:
ensemble_size = 5
ensemble_models = []
lrs = np.linspace(0.001, 0.01, ensemble_size)

for i in range(ensemble_size):
    model = train_single_model(X_train_tensor, y_train_tensor)
    ensemble_models.append(model)


In [190]:
# residuals
# Step 1: Get ensemble predictions
ensemble_preds = np.mean([model(X_train_tensor).detach().numpy() for model in ensemble_models], axis=0)

# Step 2: Fit residual model
residuals = y_train_tensor.numpy() - ensemble_preds
residual_model = train_single_model(X_train_tensor, torch.tensor(residuals, dtype=torch.float32))

In [191]:
def predict_with_ensemble(models, sample_tensor, residuals=True):
    preds = []
    for model in models:
        model.eval()
        with torch.no_grad():
            pred = model(sample_tensor)
            preds.append(pred.numpy())
            pred_res = 0
            if residuals:
                pred_res = residual_model(sample_tensor).numpy()
    avg_pred_scaled = np.mean(preds, axis=0) + pred_res
    return scaler_y.inverse_transform(avg_pred_scaled.reshape(-1, 1))


In [197]:
time = '20:42.38'
distance = 5
time = '13:22.02'
distance = 3
time = '6:41.42'
distance = 1.5
time = '5:24'
distance = 1.609
gender = 0
road = False

def prepare_event(time_str, distance):
    parts = time_str.split(':')
    seconds = float(parts[-1])
    for i, part in enumerate(reversed(parts[:-1])):
        seconds += int(part) * 60 ** (i+1)
    return np.log(seconds), np.log(distance), np.log(seconds/distance)

t, d, p = prepare_event(time, distance)

# Construct the input as before
new_sample = np.array([[t, d, p, gender, road]])  # shape: (1, 5)
new_sample_scaled = np.concatenate([
    scaler_X.transform(new_sample[:, :3]),  # only scale t, d, p
    new_sample[:, 3:]                      # leave binary inputs as-is
], axis=1)
new_sample_tensor = torch.tensor(new_sample_scaled, dtype=torch.float32)

# Predict using ensemble
final_prediction = predict_with_ensemble(ensemble_models, new_sample_tensor, residuals=True)
print(f"Ensemble predicted points: {final_prediction[0][0]:.2f}")


Ensemble predicted points: 1307.87
