## DATA PREPROCESSING

In [59]:
import pandas as pd
# DROP NANS
file_name = 'dataset_ultimate_plus.csv'
df = pd.read_csv(file_name)

df_cleaned = df.dropna()

df_cleaned.to_csv('cleaned_dataset_ultimate_plus.csv', index=False)

print(f"Removed: {len(df) - len(df_cleaned)}")



In [1]:
import pandas as pd
import os

# CONFIGURATION
input_file = 'cleaned_dataset_ultimate_plus.csv'
# target_map = 'de_mirage'
# target_map = 'de_dust2'
# target_map = 'de_ancient'
# target_map = 'de_nuke'
# target_map = 'de_inferno'
# target_map = 'de_train'
# target_map = 'de_overpass'
target_map = 'de_anubis'

train_ready_file = f'output_{target_map}.csv'
metadata_file = f'meta_{target_map}.csv'

if not os.path.exists(input_file):
    print(f"File {input_file} not found!")
else:
    df = pd.read_csv(input_file)

    df = df[df['map'] == target_map].copy()

    df = df.rename(columns={
        'winner': 'target',
    })

    meta_columns = ['match_id', 'team1_score', 'team2_score', 'target']
    meta_columns = [c for c in meta_columns if c in df.columns]

    df_meta = df[meta_columns].copy()
    df_meta.to_csv(metadata_file, index=False)

    cols_to_drop = ['match_id', 'team1_score', 'team2_score', 'map']
    df_train = df.drop(columns=[c for c in cols_to_drop if c in df.columns], errors='ignore')

    df_train.to_csv(train_ready_file, index=False)

    print(f"1. Train (X, y): '{train_ready_file}' (Columns: {len(df_train.columns)})")
    print(f"2. Meta (Score, ID): '{metadata_file}'")
    print(f"Total matches: {len(df)}")

1. Train (X, y): 'output_de_anubis.csv' (Columns: 441)
2. Meta (Score, ID): 'meta_de_anubis.csv'
Total matches: 142408


## MODEL

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchinfo import summary
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np


class FaceitPredictor(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super().__init__()
        self.phi = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )
        self.rho = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim, 1),
        )

    def forward(self, x):
        player_embeddings = self.phi(x)

        t1_sum = player_embeddings[:, :5, :].sum(dim=1)
        t2_sum = player_embeddings[:, 5:, :].sum(dim=1)

        t1_strength = self.rho(t1_sum)
        t2_strength = self.rho(t2_sum)

        return torch.sigmoid(t1_strength - t2_strength)

## TRAIN MODEL

In [3]:
# LOAD DATA
df_train = pd.read_csv(f'output_{target_map}.csv')
df_meta = pd.read_csv(f'meta_{target_map}.csv')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")


def calculate_weights(df):
    s1 = df['team1_score'].astype(int)
    s2 = df['team2_score'].astype(int)

    diff = abs(s1 - s2)

    weights = 0.2 + 0.8 * (diff / 13.0)
    return weights.values


X = df_train.drop(columns=['target']).values
y = df_train['target'].values
weights = calculate_weights(df_meta)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

num_players = 10
num_features_per_player = X_scaled.shape[1] // num_players

X_tensor = torch.tensor(X_scaled, dtype=torch.float32).view(-1, num_players, num_features_per_player)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)
weights_tensor = torch.tensor(weights, dtype=torch.float32).view(-1, 1)

X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
    X_tensor, y_tensor, weights_tensor, test_size=0.2, random_state=42
)

train_loader = DataLoader(TensorDataset(X_train, y_train, w_train), batch_size=256, shuffle=True)

model = FaceitPredictor(num_features_per_player).to(device)
criterion = nn.BCELoss(reduction='none').to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
summary(model, input_size=(32, 10, num_features_per_player), device=device, verbose=1)

for epoch in range(30):
    model.train()
    total_loss = 0

    for batch_X, batch_y, batch_w in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        batch_w = batch_w.to(device)

        optimizer.zero_grad()
        output = model(batch_X)

        raw_loss = criterion(output, batch_y)
        weighted_loss = raw_loss * batch_w

        loss = weighted_loss.mean()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Weighted Loss: {total_loss / len(train_loader):.4f}")


Device: cuda
Layer (type:depth-idx)                   Output Shape              Param #
FaceitPredictor                          [32, 1]                   --
├─Sequential: 1-1                        [32, 10, 32]              --
│    └─Linear: 2-1                       [32, 10, 32]              1,440
│    └─ReLU: 2-2                         [32, 10, 32]              --
│    └─Dropout: 2-3                      [32, 10, 32]              --
│    └─Linear: 2-4                       [32, 10, 32]              1,056
│    └─ReLU: 2-5                         [32, 10, 32]              --
├─Sequential: 1-2                        [32, 1]                   --
│    └─Linear: 2-6                       [32, 32]                  1,056
│    └─ReLU: 2-7                         [32, 32]                  --
│    └─Dropout: 2-8                      [32, 32]                  --
│    └─Linear: 2-9                       [32, 1]                   33
├─Sequential: 1-3                        [32, 1]               

In [4]:
def save_detailed_results(model, X_tensor, indices, original_df, filename, device):
    model.eval()
    with torch.no_grad():
        probs = model(X_tensor.to(device)).cpu().numpy().flatten()

    results_df = original_df.iloc[indices].copy()

    results_df['predicted_prob_t1'] = probs
    results_df['predicted_winner'] = (probs > 0.5).astype(int)

    results_df = results_df.rename(columns={'target': 'real_winner_t1'})

    final_cols = [
        'match_id',
        'team1_score',
        'team2_score',
        'real_winner_t1',
        'predicted_prob_t1',
        'predicted_winner'
    ]

    results_df[final_cols].to_csv(filename, index=False)
    print(
        f"File {filename} saved. Accuracy: {(results_df['real_winner_t1'] == results_df['predicted_winner']).mean():.4f}")


df_indices = np.arange(len(df_meta))
train_idx, test_idx = train_test_split(df_indices, test_size=0.2, random_state=42)

save_detailed_results(model, X_train, train_idx, df_meta, f'results_train_{target_map}.csv', device)

save_detailed_results(model, X_test, test_idx, df_meta, f'results_test_{target_map}.csv', device)

File results_train_de_anubis.csv saved. Accuracy: 0.6138
File results_test_de_anubis.csv saved. Accuracy: 0.6001


## EXPORT MODEL

In [5]:
model.eval()
dummy_input = torch.randn(1, 10, num_features_per_player).to(device)
torch.onnx.export(
    model,
    dummy_input,
    f"model_{target_map}.onnx",
    export_params=True,
    opset_version=18,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)

  torch.onnx.export(


[torch.onnx] Obtain model graph for `FaceitPredictor([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `FaceitPredictor([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 2 of general pattern rewrite rules.


ONNXProgram(
    model=
        <
            ir_version=10,
            opset_imports={'': 18},
            producer_name='pytorch',
            producer_version='2.9.1+cu130',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"input"<FLOAT,[s77,10,44]>
            ),
            outputs=(
                %"output"<FLOAT,[s77,1]>
            ),
            initializers=(
                %"phi.0.bias"<FLOAT,[32]>{TorchTensor(...)},
                %"phi.3.bias"<FLOAT,[32]>{TorchTensor(...)},
                %"rho.0.weight"<FLOAT,[32,32]>{TorchTensor(...)},
                %"rho.0.bias"<FLOAT,[32]>{TorchTensor(...)},
                %"rho.3.weight"<FLOAT,[1,32]>{TorchTensor(...)},
                %"rho.3.bias"<FLOAT,[1]>{TorchTensor<FLOAT,[1]>(Parameter containing: tensor([4.9055e-41], device='cuda:0', requires_grad=True), name='rho.3.bias')},
                %"val_0"<FLOAT,[44,32]>{Tensor

In [6]:
np.savez(f'scaler_params_{target_map}.npz', mean=scaler.mean_, scale=scaler.scale_)