In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [80]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
from xgboost import XGBClassifier
import os

In [107]:
files = ['/content/drive/MyDrive/Итоговая работа/data/wowbgs.csv', '/content/drive/MyDrive/Итоговая работа/data/wowbgs2.csv', '/content/drive/MyDrive/Итоговая работа/data/wowgil.csv', '/content/drive/MyDrive/Итоговая работа/data/wowgil2.csv', '/content/drive/MyDrive/Итоговая работа/data/wowsm.csv', '/content/drive/MyDrive/Итоговая работа/data/wowsm2.csv', '/content/drive/MyDrive/Итоговая работа/data/wowtk.csv', '/content/drive/MyDrive/Итоговая работа/data/wowtk2.csv', '/content/drive/MyDrive/Итоговая работа/data/wowwg.csv', '/content/drive/MyDrive/Итоговая работа/data/wowwg2.csv']

In [108]:
df_list = []
for f in files:
    try:
        df_temp = pd.read_csv(f)
        df_list.append(df_temp)
        print(f"Загружено: {f} → {len(df_temp)} строк")
    except Exception as e:
        print(f"Ошибка при загрузке {f}: {e}")

df = pd.concat(df_list, ignore_index=True)
print(f"\nИтого: {len(df)} строк")

Загружено: /content/drive/MyDrive/Итоговая работа/data/wowbgs.csv → 3726 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowbgs2.csv → 5383 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowgil.csv → 657 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowgil2.csv → 817 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowsm.csv → 536 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowsm2.csv → 734 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowtk.csv → 494 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowtk2.csv → 789 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowwg.csv → 687 строк
Загружено: /content/drive/MyDrive/Итоговая работа/data/wowwg2.csv → 827 строк

Итого: 14650 строк


In [109]:
df['win'] = df['Win'].fillna(0).astype(int)

In [110]:
df['efficiency'] = (
    (df['KB'] * 10 + df['HK'] * 5 + df['DD'] / 1000 + df['HD'] / 1000) / (df['D'] + 1)
    )

In [111]:
df['BE'] = df['BE'].fillna(0).astype(int)

In [112]:
df['efficiency'] = df['efficiency'].fillna(0)

In [113]:
efficiency_map = df.groupby(['Faction', 'Class', 'Rol'])['efficiency'].mean().reset_index()
efficiency_map.rename(columns={'efficiency': 'avg_efficiency'}, inplace=True)

In [114]:
df = df.merge(efficiency_map, on=['Faction', 'Class', 'Rol'], how='left')
df['avg_efficiency'] = df['avg_efficiency'].fillna(0)

In [115]:
feature_cols = ['Faction', 'Class', 'Rol', 'BE', 'avg_efficiency']
X = df[feature_cols]
y = df['win']

In [116]:
X_encoded = pd.get_dummies(X, columns=['Faction', 'Class', 'Rol'], drop_first=True)

In [117]:
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)

In [118]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)
print("Random Forest Accuracy:", rf_acc)

Random Forest Accuracy: 0.6395904436860068


In [119]:
xgb_model = XGBClassifier(n_estimators=100, random_state=42, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
xgb_acc = accuracy_score(y_test, xgb_pred)
print("XGBoost Accuracy:", xgb_acc)

XGBoost Accuracy: 0.6395904436860068


In [120]:
print(classification_report(y_test, xgb_pred))

              precision    recall  f1-score   support

           0       0.63      0.64      0.64      1445
           1       0.65      0.64      0.64      1485

    accuracy                           0.64      2930
   macro avg       0.64      0.64      0.64      2930
weighted avg       0.64      0.64      0.64      2930



In [77]:
joblib.dump(xgb_model, '/content/drive/MyDrive/Итоговая работа/model/battlegrounds_model.pkl')
joblib.dump(X_encoded.columns.tolist(), '/content/drive/MyDrive/Итоговая работа/model/model_features.pkl')
joblib.dump(efficiency_map, '/content/drive/MyDrive/Итоговая работа/model/efficiency_map.pkl')

['/content/drive/MyDrive/Итоговая работа/model/efficiency_map.pkl']

In [54]:
win_rate_fr = df.groupby(['Faction', 'Rol'])['win'].mean()
joblib.dump(win_rate_fr.to_dict(), '/content/drive/MyDrive/Итоговая работа/model/win_rate_by_faction_role.pkl')

class_eff = df.groupby(['Faction', 'Class'])['efficiency'].mean()
joblib.dump(class_eff.to_dict(), '/content/drive/MyDrive/Итоговая работа/model/class_efficiency.pkl')

df['is_heal'] = (df['Rol'] == 'heal').astype(int)
team_stats = df.groupby('Code').agg(
    total=('Rol', 'size'),
    healers=('is_heal', 'sum')
).reset_index()
team_stats['heal_ratio'] = team_stats['healers'] / team_stats['total']
avg_heal_ratio = team_stats['heal_ratio'].mean()
joblib.dump({'avg_heal_ratio': avg_heal_ratio}, '/content/drive/MyDrive/Итоговая работа/model/heal_ratio_stats.pkl')

['/content/drive/MyDrive/Итоговая работа/model/heal_ratio_stats.pkl']