In [1]:
import sys

# setting path
sys.path.append('..')


In [2]:
# outside package
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import torchvision.models
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Read Data

In [3]:
heroes_data = pd.read_csv("../data/processed_data/heroes_data.csv", index_col=0)
heroes_data.head()

Unnamed: 0,id,name,win_rate,against_hero_1,against_hero_2,against_hero_3,against_hero_4,against_hero_5,against_hero_6,against_hero_7,...,against_hero_119,against_hero_120,against_hero_121,against_hero_123,against_hero_126,against_hero_128,against_hero_129,against_hero_135,against_hero_136,against_hero_137
0,1,Anti-Mage,0.4891,,0.566667,0.464789,0.447368,0.44186,0.608696,0.588235,...,0.545455,0.5,0.535433,0.381579,0.477273,0.460526,0.413793,0.428571,0.52,0.521739
1,2,Axe,0.485514,0.433333,,0.474576,0.576923,0.471264,0.507937,0.666667,...,0.473684,0.482143,0.474227,0.508475,0.486486,0.490196,0.544828,0.419355,0.471429,0.392857
2,3,Bane,0.552398,0.535211,0.525424,,0.605042,0.576087,0.495413,0.5625,...,0.516129,0.530928,0.592018,0.617925,0.616822,0.595745,0.583984,0.547945,0.490066,0.589744
3,4,Bloodseeker,0.496582,0.552632,0.423077,0.394958,,0.589147,0.480769,0.490909,...,0.48,0.515513,0.620321,0.537313,0.567164,0.50655,0.517647,0.517413,0.483871,0.387755
4,5,Crystal Maiden,0.496833,0.55814,0.528736,0.423913,0.410853,,0.491124,0.461538,...,0.519481,0.507519,0.550296,0.516807,0.4,0.482085,0.538462,0.492593,0.490141,0.514851


In [4]:
heroes_stats = pd.read_csv("../data/processed_data/heroes_stats.csv", index_col=0)
heroes_stats.head()

Unnamed: 0,id,localized_name,base_health,base_health_regen,base_mana,base_mana_regen,base_armor,base_mr,base_attack_min,base_attack_max,...,Durable,Escape,Initiator,Jungler,Nuker,Pusher,Support,agi,int,Melee
0,1,Anti-Mage,200,0.25,75,0.0,0.0,25,29,33,...,0,1,0,0,1,0,0,1,0,1
1,2,Axe,200,2.5,75,0.0,-1.0,25,27,31,...,1,0,1,1,0,0,0,0,0,1
2,3,Bane,200,0.25,75,0.0,1.0,25,33,39,...,1,0,0,0,1,0,1,0,1,0
3,4,Bloodseeker,200,0.25,75,0.0,2.0,25,35,41,...,0,0,1,1,1,0,0,1,0,1
4,5,Crystal Maiden,200,0.25,75,1.0,-1.0,25,28,34,...,0,0,0,1,1,0,1,0,1,0


In [5]:
data = pd.read_csv("../data/processed_data/data.json", index_col=0)
data.head()

Unnamed: 0,match_id,radiant_win,region,patch,first_pick_team,selection_0,selection_1,selection_2,selection_3,selection_4,...,selection_14,selection_15,selection_16,selection_17,selection_18,selection_19,selection_20,selection_21,selection_22,selection_23
0,5746455668,False,3,47,1,85,91,79,41,43,...,47,20,64,94,128,65,4,33,28,55
1,5795993534,True,3,47,0,65,88,32,111,13,...,38,51,12,4,47,54,46,11,55,114
2,5883814830,True,5,47,1,102,88,73,13,86,...,129,121,110,32,113,106,126,17,39,74
3,5860105982,True,17,47,0,38,88,9,13,111,...,102,42,126,11,70,29,12,60,104,18
4,5831784791,True,3,47,1,7,90,88,91,111,...,86,107,94,1,28,49,2,22,55,104


In [6]:
merged_data = pd.read_csv("../data/processed_data/merged_data.csv", index_col=0)
merged_data.head()

Unnamed: 0,match_id,radiant_win,region,patch,first_pick_team,selection_0_id,selection_0_base_health,selection_0_base_health_regen,selection_0_base_mana,selection_0_base_mana_regen,...,selection_23_Durable,selection_23_Escape,selection_23_Initiator,selection_23_Jungler,selection_23_Nuker,selection_23_Pusher,selection_23_Support,selection_23_agi,selection_23_int,selection_23_Melee
0,5746455668,False,3,47,1,85,200,0.25,75,0.25,...,0,1,1,1,0,0,0,0,1,1
1,6111186894,False,3,48,0,110,200,2.0,75,0.0,...,0,1,1,1,0,0,0,0,1,1
2,6054787486,False,5,48,0,46,200,0.25,75,0.0,...,0,1,1,1,0,0,0,0,1,1
3,5848322719,True,3,47,0,88,200,2.0,75,0.0,...,0,1,1,1,0,0,0,0,1,1
4,5807290727,True,3,47,0,129,200,0.25,75,0.5,...,0,1,1,1,0,0,0,0,1,1


# Prepare Data For Training

In [7]:
X = merged_data.drop(["match_id", "radiant_win"], axis=1)
y = merged_data["radiant_win"]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
y_train.mean()

0.5258608862669777

In [10]:
y_test.mean()

0.5298957761930884

# Neural Network

In [11]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


Convert Data to Tensor

In [12]:
training_data = TensorDataset(torch.Tensor(X_train.values), torch.Tensor(y_train.values))
test_data = TensorDataset(torch.Tensor(X_test.values), torch.Tensor(y_test.values))

Create Data Loader

In [13]:
training_dataloader = DataLoader(training_data, batch_size=10, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_data, batch_size=10, shuffle=True, num_workers=0)

In [14]:
for X, y in test_dataloader:
    print(f"Shape of X [N, C]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C]: torch.Size([10, 819])
Shape of y: torch.Size([10]) torch.float32


In [15]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(819, 2000),
            nn.ReLU(),
            nn.Linear(2000, 512),
            nn.ReLU(),
            nn.Linear(512, 200),
            nn.ReLU(),
            nn.Linear(200, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=819, out_features=2000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2000, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=1, bias=True)
    (7): Sigmoid()
  )
)


In [16]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [17]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X).flatten()
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [18]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    # optimize calculation time
    with torch.no_grad():
        for X, y in dataloader:
            y = y
            X, y = X.to(device), y.to(device)
            pred = model(X).flatten()
            test_loss += loss_fn(pred, y).item()
            correct += (pred == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [19]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t + 1}\n-------------------------------")
    train(training_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 3.332207  [    0/ 7289]
loss: 40.000000  [ 1000/ 7289]
loss: 50.000000  [ 2000/ 7289]
loss: 80.000000  [ 3000/ 7289]
loss: 70.000000  [ 4000/ 7289]
loss: 60.000000  [ 5000/ 7289]
loss: 70.000000  [ 6000/ 7289]
loss: 40.000000  [ 7000/ 7289]
Test Error: 
 Accuracy: 53.0%, Avg loss: 47.085610 

Epoch 2
-------------------------------
loss: 40.000000  [    0/ 7289]
loss: 20.000000  [ 1000/ 7289]
loss: 70.000000  [ 2000/ 7289]
loss: 30.000000  [ 3000/ 7289]
loss: 60.000000  [ 4000/ 7289]
loss: 60.000000  [ 5000/ 7289]
loss: 60.000000  [ 6000/ 7289]
loss: 50.000000  [ 7000/ 7289]
Test Error: 
 Accuracy: 53.0%, Avg loss: 47.213115 

Epoch 3
-------------------------------
loss: 40.000000  [    0/ 7289]
loss: 50.000000  [ 1000/ 7289]
loss: 60.000000  [ 2000/ 7289]
loss: 80.000000  [ 3000/ 7289]
loss: 30.000000  [ 4000/ 7289]
loss: 40.000000  [ 5000/ 7289]
loss: 30.000000  [ 6000/ 7289]
loss: 50.000000  [ 7000/ 7289]
Test Error: 
 Accuracy: 53.0%, 

## Save Model

In [20]:
torch.save(model.state_dict(), "../models/draft_evaluation_model.pth")

In [21]:
model.load_state_dict(torch.load('../models/draft_evaluation_model.pth'))

<All keys matched successfully>

In [22]:
model.eval()

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=819, out_features=2000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2000, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=1, bias=True)
    (7): Sigmoid()
  )
)

# 24 Models


In [None]:
model(torch.atleast_2d(torch.Tensor(merged_data.iloc[3].drop(["match_id", "radiant_win"])).to(device)))

In [None]:
test(test_dataloader, model, loss_fn)


In [None]:
y_test.mean()

In [None]:
for i in range(14):
    print(
        model(torch.atleast_2d(torch.Tensor(merged_data.iloc[i].drop(["match_id", "radiant_win"])).to(device))).argmax(
            1))


# Decision Trees


In [23]:
dt = DecisionTreeClassifier()

In [24]:
dt.fit(X_train, y_train)


In [25]:
dt.score(X_test, y_test)


0.5024684585847504

In [26]:
y_test.mean()


0.5298957761930884

# Random Forest

In [31]:
rf = GridSearchCV(RandomForestClassifier(), param_grid={
    "n_estimators": [50, 100],
    "max_depth": [1, 2, 3, 4],
    "criterion": ["entropy"],
    "min_samples_split": [10],
    "n_jobs": [-1]
}, cv=5, verbose=2, n_jobs=-1)


In [32]:
rf.fit(X_train, y_train)


Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [33]:
rf.best_score_ - y_train.mean()


0.001920986965974647

In [34]:
rf.best_estimator_


In [35]:
rf.best_params_


{'criterion': 'entropy',
 'max_depth': 4,
 'min_samples_split': 10,
 'n_estimators': 100,
 'n_jobs': -1}

In [36]:
rf.score(X_test, y_test) - y_test.mean()

0.0010970927043334466

In [46]:
pd.DataFrame(rf.best_estimator_.feature_importances_, index=X_train.columns).sort_values(by=0, ascending=False).head()


Unnamed: 0,0
selection_19_agi_gain,0.010968
selection_2_base_str,0.009645
selection_17_id,0.009643
selection_23_id,0.009389
selection_22_base_attack_max,0.009345


In [None]:
# try xg boost
# shallow tree
# data analysis


# XGBoost


In [14]:
xgb = GridSearchCV(XGBClassifier(), param_grid={
    "booster": ["gbtree", "gblinear", "dart"],
    "objective": ["binary:logistic"],
    'max_depth': [1, 2, 3, 4],
    'n_estimators': [50, 100],
    'learning_rate': [1]
}, cv=5, verbose=2, n_jobs=-1)

In [15]:
xgb.fit(X_train, y_train)


Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [16]:
xgb.score(X_test, y_test) - y_test.mean()

-0.03620405924300607

# Data Analysis


In [27]:
picked_heroes = data.drop(["match_id", "region", "patch"] + ["selection_{}".format(i) for i in [0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 18, 19, 20, 21]], axis=1)
picked_heroes.head()

Unnamed: 0,radiant_win,first_pick_team,selection_4,selection_5,selection_6,selection_7,selection_14,selection_15,selection_16,selection_17,selection_22,selection_23
0,False,1,43,107,51,10,47,20,64,94,28,55
1,True,0,13,30,78,86,38,51,12,4,55,114
2,True,1,86,103,29,91,129,121,110,32,39,74
3,True,0,111,26,107,129,102,42,126,11,104,18
4,True,1,111,13,65,102,86,107,94,1,55,104


In [33]:
team_0_pick_first = picked_heroes.query("first_pick_team == 0")
team_0_pick_first.head()

Unnamed: 0,radiant_win,first_pick_team,selection_4,selection_5,selection_6,selection_7,selection_14,selection_15,selection_16,selection_17,selection_22,selection_23
1,True,0,13,30,78,86,38,51,12,4,55,114
3,True,0,111,26,107,129,102,42,126,11,104,18
5,False,0,28,21,41,44,57,79,69,74,5,13
7,False,0,120,13,21,129,26,108,73,46,6,106
10,False,0,126,85,47,74,19,31,129,12,7,44


In [34]:
team_1_pick_first = picked_heroes.query("first_pick_team == 1")
team_1_pick_first.head()

Unnamed: 0,radiant_win,first_pick_team,selection_4,selection_5,selection_6,selection_7,selection_14,selection_15,selection_16,selection_17,selection_22,selection_23
0,False,1,43,107,51,10,47,20,64,94,28,55
2,True,1,86,103,29,91,129,121,110,32,39,74
4,True,1,111,13,65,102,86,107,94,1,55,104
6,False,1,88,86,58,85,104,12,89,22,16,39
8,False,1,86,129,95,15,100,103,111,60,25,45


In [41]:
team_drafts = []
for index, row in team_0_pick_first.iterrows():
    temp = dict()
    temp["radiant_lineup"] = [row["selection_{}".format(i)] for i in [4, 7, 15, 16, 22]]
    temp["dire_lineup"] = [row["selection_{}".format(i)] for i in [5, 6, 14, 17, 23]]
    temp["radiant_win"] = row["radiant_win"]
    team_drafts.append(temp)
for index, row in team_1_pick_first.iterrows():
    temp = dict()
    temp["dire_lineup"] = [row["selection_{}".format(i)] for i in [4, 7, 15, 16, 22]]
    temp["radiant_lineup"] = [row["selection_{}".format(i)] for i in [5, 6, 14, 17, 23]]
    temp["radiant_win"] = row["radiant_win"]
    team_drafts.append(temp)

In [43]:
team_drafts = pd.DataFrame(team_drafts)
team_drafts.head()

Unnamed: 0,radiant_lineup,dire_lineup,radiant_win
0,"[13, 86, 51, 12, 55]","[30, 78, 38, 4, 114]",True
1,"[111, 129, 42, 126, 104]","[26, 107, 102, 11, 18]",True
2,"[28, 44, 79, 69, 5]","[21, 41, 57, 74, 13]",False
3,"[120, 129, 108, 73, 6]","[13, 21, 26, 46, 106]",False
4,"[126, 74, 31, 129, 7]","[85, 47, 19, 12, 44]",False


In [21]:
# 敌法师 1         美杜莎 94

In [62]:
filter = team_drafts[["radiant_lineup", "dire_lineup"]].apply(lambda x: 1 in x["radiant_lineup"] and 94 in x["dire_lineup"], axis=1) | team_drafts[["radiant_lineup", "dire_lineup"]].apply(lambda x: 94 in x["radiant_lineup"] and 1 in x["dire_lineup"], axis=1)

In [65]:
filtered_data = team_drafts[filter]
filtered_data.head()

Unnamed: 0,radiant_lineup,dire_lineup,radiant_win
191,"[90, 1, 27, 129, 65]","[119, 94, 85, 38, 112]",True
228,"[129, 1, 27, 98, 88]","[85, 94, 100, 21, 69]",True
853,"[121, 86, 16, 69, 94]","[31, 30, 102, 114, 1]",False
1152,"[128, 1, 37, 26, 29]","[129, 94, 100, 106, 104]",False
1412,"[49, 68, 123, 94, 74]","[26, 102, 129, 1, 126]",False


In [66]:
am_wins = 0
for index, row in filtered_data.iterrows():
    if row["radiant_win"]:
        if 1 in row["radiant_lineup"]:
            am_wins += 1
    else:
        if 1 in row["dire_lineup"]:
            am_wins += 1


In [67]:
am_wins / len(filtered_data)


0.6097560975609756