In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import numpy as np
import joblib
from xgboost import XGBClassifier

In [3]:
df = pd.read_csv('/content/drive/MyDrive/Итоговый Проект/wowah_data.csv', nrows=3000000)
zones_df = pd.read_csv('/content/drive/MyDrive/Итоговый Проект/zones.csv')

In [4]:
df.columns = df.columns.str.strip()

df.columns.tolist()

['char', 'level', 'race', 'charclass', 'zone', 'guild', 'timestamp']

In [5]:
df['is_max_level'] = (df['level'] == 70).astype(int)

print(df['is_max_level'].value_counts(normalize=True))

is_max_level
1    0.651131
0    0.348869
Name: proportion, dtype: float64


In [6]:
df['has_guild'] = (df['guild'] != -1).astype(int)

In [7]:
df['zone_clean'] = df['zone'].str.strip()
zones_df['Zone_Name_clean'] = zones_df['Zone_Name'].str.strip()

In [8]:
for col in ['Type', 'Min_rec_level', 'Max_rec_level', 'Controlled']:
    if col in df.columns:
        df = df.drop(columns=[col])

In [9]:
df = df.merge(
    zones_df[['Zone_Name_clean', 'Type', 'Min_rec_level', 'Max_rec_level', 'Controlled']],
    left_on='zone_clean',
    right_on='Zone_Name_clean',
    how='left'
)

df.drop(columns=['zone_clean', 'Zone_Name_clean'], inplace=True)

In [10]:
df[['zone', 'Type', 'Min_rec_level', 'Controlled', 'has_guild', 'is_max_level']].head()

Unnamed: 0,zone,Type,Min_rec_level,Controlled,has_guild,is_max_level
0,Orgrimmar,City,1.0,Horde,1,0
1,Durotar,Zone,1.0,Horde,0,0
2,Ghostlands,Zone,10.0,Horde,0,0
3,Ghostlands,Zone,10.0,Horde,0,0
4,Hellfire Peninsula,Zone,58.0,Contested,0,0


In [11]:
feature_columns = ['race', 'charclass', 'Type', 'Controlled', 'Min_rec_level', 'has_guild']
target_column = 'is_max_level'

X = df[feature_columns]
y = df[target_column]

In [12]:
X_encoded = pd.get_dummies(X, columns=['race', 'charclass', 'Type', 'Controlled'], drop_first=True)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)

In [14]:
xgb_model = XGBClassifier(n_estimators=100, random_state=42, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
xgb_acc = accuracy_score(y_test, xgb_pred)
print("XGBoost Accuracy:", xgb_acc)

XGBoost Accuracy: 0.8592383333333333


In [15]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)
print("Random Forest Точность:", rf_acc)

Random Forest Точность: 0.859825


In [16]:
print(classification_report(y_test, rf_pred))

              precision    recall  f1-score   support

           0       0.83      0.75      0.79    209322
           1       0.87      0.92      0.90    390678

    accuracy                           0.86    600000
   macro avg       0.85      0.83      0.84    600000
weighted avg       0.86      0.86      0.86    600000



In [17]:
joblib.dump(rf_model, 'wow_max_level_model.pkl')

joblib.dump(X_encoded.columns.tolist(), 'model_features.pkl')

['model_features.pkl']

In [18]:
df['guild'].unique()

array([165,  -1, 243,  35, 103,  79,  19,  53, 174, 101, 204,   5, 167,
         9, 104, 216, 282, 228, 220, 291, 293, 273, 205, 251, 161, 266,
       235, 156, 259, 253, 108, 171,  62,  99, 281, 189, 289, 256, 186,
       269, 288,  23, 160, 277, 184, 252, 131, 226, 247,  65,  12, 241,
       107,  97,  96, 166,   8, 110, 244, 140, 290,  16,  90, 169, 150,
         4, 272, 236, 258,   3, 141, 239, 135, 117, 191, 255, 178, 173,
       271, 238, 297, 203, 168, 115, 198, 199, 157, 246, 276, 136, 298,
       105,  72,  92, 193, 254,  49, 215, 123, 213, 264, 147, 137, 218,
       121, 268, 206, 119,  93, 267, 146, 143, 240, 287, 155, 197, 195,
       116, 211, 262, 245, 200, 248,  45,  47, 176, 114,  60, 292, 250,
       265, 170, 294, 209, 299, 177, 231, 134, 300,  27, 133, 278, 296,
       132, 301, 285, 261, 302, 175,  81,  82, 208, 219, 257,  40, 180,
       275, 260, 303,  46, 124, 284, 270, 304, 295, 154, 305, 306, 307,
       308, 309, 106, 286, 310, 280, 311, 312, 313, 279, 314, 17