In [33]:
# Импорт необходимых библиотек
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

In [34]:
# from google.colab import drive
# drive.mount('/content/drive')
# data = pd.read_csv("/content/drive/My Drive/game_ds/high_diamond_ranked_10min.csv", sep = ",")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
data = pd.read_csv("high_diamond_ranked_10min.csv", sep = ",")

# Анализ и предобработка

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data.hist(figsize = (20,20))

In [None]:
data["blueWins"].value_counts(normalize=True) * 100

In [None]:
# Корреляционный анализ
correlation_matrix = data.corr()
plt.figure(figsize=(25, 20))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.title('Корреляционная матрица')

In [None]:
# Создание графика
plt.figure(figsize=(10, 6))
scatter = plt.scatter(
    data['blueExperienceDiff'],
    data['blueGoldDiff'],
    c=data['blueWins'],
    cmap='RdYlBu',
    alpha=0.6
)

# Оформление
plt.colorbar(label='blueWins (0 = Red win, 1 = Blue win)')
plt.xlabel('Разница в опыте (blueExperienceDiff)')
plt.ylabel('Разница в золоте (blueGoldDiff)')
plt.title('Зависимость победы Blue от разницы опыта и золота')
plt.grid(True)
plt.show()

In [None]:
# Предобработка данных
X = data.drop(columns=['gameId', 'blueWins','blueGoldDiff','blueExperienceDiff'])
y = (data['blueWins'] == 1).astype(int)  # Победа первой команды — 1, иначе — 0

In [None]:
# Деление на обучающую и тестовую выборки
X_tset, X_test, y_tset, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_tset, y_tset, test_size=0.25, random_state=42)  # 20% в валидацию (0.25 от 0.8)

# Масштабирование признаков
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

In [None]:
# Преобразование в тензоры
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)


# Модели, обучение, тестирование

In [None]:
class Net(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

class Net1(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

class Net2(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

class Net3(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

class ImprovedNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

class ImprovedNetV2(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
def train(model_class, X_train, y_train, X_val, y_val, X_test, y_test, epochs=50, patience=3):
    model = model_class()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float("inf")
    best_weights = None
    trigger = 0
    train_losses, val_losses, train_accs, val_accs = [], [], [], []

    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val)
            train_acc = accuracy_score(y_train.numpy(), (output.numpy() >= 0.5).astype(int))
            val_acc = accuracy_score(y_val.numpy(), (val_output.numpy() >= 0.5).astype(int))

        train_losses.append(loss.item())
        val_losses.append(val_loss.item())
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            best_weights = model.state_dict()
            trigger = 0
        else:
            trigger += 1
            if trigger >= patience:
                break

    end_time = time.time()
    model.load_state_dict(best_weights)

    # Оценка на тесте
    model.eval()
    with torch.no_grad():
        y_pred = (model(X_test) >= 0.5).int().numpy()

    return model, train_losses, val_losses, train_accs, val_accs, end_time - start_time, y_pred


In [None]:
# Обучение
models = [Net, Net1, Net2, Net3]
names = ["Net", "Net1", "Net2", "Net3"]
input_dim = X_train.shape[1]

# Словари для хранения метрик
all_train_accs = {}
all_val_accs = {}
all_train_losses = {}
all_val_losses = {}

for name, model_class in zip(names, models):
    model_init = lambda: model_class(input_dim)
    model, train_losses, val_losses, train_accs, val_accs, train_time, y_pred = train(
        model_init, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor
    )

    # История метрик при обучении
    all_train_accs[name] = train_accs
    all_val_accs[name] = val_accs
    all_train_losses[name] = train_losses
    all_val_losses[name] = val_losses

    print(f"Архитектура: {name}")
    print(f"Время обучения: {train_time:.2f} сек")
    print(f"Валидационная точность: {val_accs[-1]:.4f}")

    # Предсказание вероятностей и меток
    model.eval()
    with torch.no_grad():
        test_output = model(X_test_tensor)
        y_pred = (test_output >= 0.5).int().numpy()
        y_proba = test_output.detach().numpy()

    # Расчёт ROC AUC
    roc_auc = roc_auc_score(y_test_tensor.numpy(), y_proba)
    print(f"ROC AUC (тест): {roc_auc:.4f}")

    print("Отчёт по метрикам (тестовая выборка):")
    print(classification_report(y_test, y_pred, target_names=["Победа красных (0)", "Победа синих(1)"]))
    # Визуализация
    plt.figure(figsize=(12, 5))
    plt.suptitle(f"Модель: {name}")

    plt.subplot(1, 2, 1)
    plt.plot(train_accs, label="Train Acc")
    plt.plot(val_accs, label="Val Acc")
    plt.title("Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_losses, label="Train Loss")
    plt.plot(val_losses, label="Val Loss")
    plt.title("Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    plt.tight_layout()
    plt.show()

    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="RdYlBu", values_format="d")
    plt.title(f"Матрица ошибок: {name}")
    plt.show()



In [None]:
# Общий график точности
plt.figure(figsize=(10, 5))
plt.title("Accuracy всех моделей")
for name, accs in all_val_accs.items():
    plt.plot(accs, label=name)
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.show()

# Общий график потерь
plt.figure(figsize=(10, 5))
plt.title("Loss всех моделей")
for name, losses in all_val_losses.items():
    plt.plot(losses, label=name)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()


# Улучшение модели Net


In [None]:
def train_improved(model_class, X_train, y_train, X_val, y_val, X_test, y_test, epochs=50, patience=3):
    model = model_class()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

    best_val_loss = float("inf")
    best_weights = None
    trigger = 0
    train_losses, val_losses, train_accs, val_accs = [], [], [], []

    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val)

            train_preds = torch.sigmoid(output)
            val_preds = torch.sigmoid(val_output)

            train_acc = accuracy_score(y_train.numpy(), (train_preds.numpy() >= 0.5).astype(int))
            val_acc = accuracy_score(y_val.numpy(), (val_preds.numpy() >= 0.5).astype(int))

        train_losses.append(loss.item())
        val_losses.append(val_loss.item())
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            best_weights = model.state_dict()
            trigger = 0
        else:
            trigger += 1
            if trigger >= patience:
                break

    end_time = time.time()
    model.load_state_dict(best_weights)

    model.eval()
    with torch.no_grad():
        test_output = torch.sigmoid(model(X_test))
        y_pred = (test_output >= 0.5).int().numpy()
        y_proba = test_output.detach().numpy()

    return model, train_losses, val_losses, train_accs, val_accs, end_time - start_time, y_pred, y_proba


In [None]:
models = [ImprovedNet, ImprovedNetV2]
names = ["ImprovedNet", "ImprovedNetV2"]

all_train_accs = {}
all_val_accs = {}
all_train_losses = {}
all_val_losses = {}

for name, model_class in zip(names, models):
    model_init = lambda: model_class(input_dim)
    model, train_losses, val_losses, train_accs, val_accs, train_time, y_pred, y_proba = train_improved(
        model_init, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor
    )

    all_train_accs[name] = train_accs
    all_val_accs[name] = val_accs
    all_train_losses[name] = train_losses
    all_val_losses[name] = val_losses

    print(f"Архитектура: {name}")
    print(f"Время обучения: {train_time:.2f} сек")
    print(f"Валидационная точность: {val_accs[-1]:.4f}")

    roc_auc = roc_auc_score(y_test_tensor.numpy(), y_proba)
    print(f"ROC AUC (тест): {roc_auc:.4f}")

    print("Отчёт по метрикам (тестовая выборка):")
    print(classification_report(y_test_tensor.numpy(), y_pred, target_names=["Победа красных (0)", "Победа синих(1)"]))

    plt.figure(figsize=(12, 5))
    plt.suptitle(f"Модель: {name}")

    plt.subplot(1, 2, 1)
    plt.plot(train_accs, label="Train Acc")
    plt.plot(val_accs, label="Val Acc")
    plt.title("Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_losses, label="Train Loss")
    plt.plot(val_losses, label="Val Loss")
    plt.title("Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    plt.tight_layout()
    plt.show()

    cm = confusion_matrix(y_test_tensor.numpy(), y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="RdYlBu", values_format="d")
    plt.title(f"Матрица ошибок: {name}")
    plt.show()
