In [5]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score

# Download data

In [6]:
players_feats = pd.read_csv('players_feats.csv')
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

#### Merge data

In [7]:
def train_merge(a, b):

    c = pd.merge(a, b, how='left', left_on=['team1_id', 'map_id'],right_on=['team_id','map_id'])
    c = pd.merge(c, b, how='left', left_on=['team2_id', 'map_id'],right_on=['team_id','map_id'])
    c = c.drop(['map_name_x', 'map_name_y', 'team_id_x', 'team_id_y', 'map_id'], axis=1)    
    return c

## Dataset of team statistic

In [8]:
team_feats = pd.DataFrame(columns = [
    'team_id',
    'map_name',
    'map_id',
    'total_kills',
    'headshots',
    'total_deaths',
    'damage_per_round',
    'grenade_damage_per_round',
    'maps_played',
    'rounds_played',
    'kills_per_round',
    'assists_per_round',
    'deaths_per_round',
    'kill_death_difference',
    'total_opening_kills',
    'total_opening_deaths',
    'team_win_percent_after_first_kill',
    'first_kill_in_won_rounds'])


team_feats['team_id'] = players_feats['team_id']
team_feats['map_name'] = players_feats['map_name']
team_feats['map_id'] = players_feats['map_id']

for i in team_feats.columns[3:]:
    s = 0
    for j in range(5):
        s += players_feats['p'+str(j+1)+'_'+i]
    if i in ['maps_played', 'team_win_percent_after_first_kill', 'rounds_played']:
        s = s/5
    team_feats[i] = s
    
for i in [
    'total_kills',
    'headshots',
    'total_deaths',
    'rounds_played',
    'kill_death_difference',
    'total_opening_kills',
    'total_opening_deaths',
    'first_kill_in_won_rounds']:
    team_feats[i] /= team_feats.maps_played

In [9]:
features = [
    'team_id',
    'map_name',
    'map_id',
    'total_kills',
#     'damage_per_round',
    'maps_played',
#     'kills_per_round',
    'kill_death_difference',
    'total_opening_kills',
    'total_opening_deaths',
    'team_win_percent_after_first_kill',
#     'first_kill_in_won_rounds'
]

In [10]:
train_with_teams_v2 = train_merge(train, team_feats.loc[:, features])

In [11]:
map_name_list = np.unique(train_with_teams_v2.map_name)

## Dataset of team difference statistic

In [12]:
n = int((len(train_with_teams_v2.columns) - 4) / 2)

train_team_diff = train_with_teams_v2.iloc[:, :3].copy()
train_team_diff = pd.concat([train_team_diff, train_with_teams_v2.iloc[:, len(train_with_teams_v2.columns)-n:].copy()], axis=1)
train_team_diff = pd.concat([train_team_diff, train_with_teams_v2.map_name], axis=1)

for i in range(n):
    train_team_diff.iloc[:, i+3] -= train_with_teams_v2.iloc[:, i+3]

In [34]:
N = 40

maps_models = {}

for map_name in map_name_list:
    print(map_name)
    maps_models[map_name] = [None, 0]
    for j in range(N):
        y = train_team_diff[train_team_diff.map_name==map_name].who_win.copy()
        X = train_team_diff[train_team_diff.map_name==map_name].drop(['who_win', 'team1_id', 'team2_id', 'map_name'], axis=1)

        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

        model = xgb.XGBRFClassifier(
                                    objective = "binary:logistic",
                                    colsample_bynode=1,
                                    learning_rate=0.01,
                                    max_depth=3,
                                    n_estimators=500,
                                    reg_lambda=1,
                                   )
        model.fit(X_train, y_train)

        predictions = model.predict(X_test)

        fpr, tpr, treshholds = roc_curve(y_test, predictions)
        auc = roc_auc_score(y_test, predictions)

        # При таком подходе в ходе экспериментов некоторые из моделей показывали результат предсказаний ниже 50%. 
        # Однако в нашем случае бинарного результата, достаточно отразить данные, чтобы вернуть предсказания к нормальным результатам.
        
        if auc<0.5:
            model.fit(X_train, abs(y_train-1))
            predictions = model.predict(X_test)
            fpr, tpr, treshholds = roc_curve(y_test, predictions)
            auc = roc_auc_score(y_test, predictions)
        
        if maps_models[map_name][1] < auc:
            maps_models[map_name] = [model, auc]
    

    print('Best AUC score =', maps_models[map_name][1])

Ancient
Best AUC score = 0.8571428571428572
Dust2
Best AUC score = 0.7321428571428571
Inferno
Best AUC score = 0.7777777777777778
Mirage
Best AUC score = 0.7357142857142858
Nuke
Best AUC score = 0.7735042735042734
Overpass
Best AUC score = 0.8181818181818181
Vertigo
Best AUC score = 0.8125


## Test dataset

In [22]:
train_test = train_merge(test, team_feats.loc[:, features])

In [23]:
n = int((len(train_test.columns) - 4) / 2)

test_team_diff = train_test.iloc[:, :3].copy()
test_team_diff = pd.concat([test_team_diff, train_test.iloc[:, len(train_test.columns)-n:].copy()], axis=1)
test_team_diff = pd.concat([test_team_diff, train_test.map_name], axis=1)

for i in range(n):
    test_team_diff.iloc[:, i+3] -= train_test.iloc[:, i+3]

In [35]:
test_predict = np.array([0 for i in range(len(test))])

for map_name in map_name_list:
    X = test_team_diff[test_team_diff.map_name==map_name].drop(['index','team1_id', 'team2_id', 'map_name'], axis=1)

    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)

    model = maps_models[map_name][0]
    predictions = model.predict(X)
    test_predict[test_team_diff.map_name==map_name] = predictions

test_to_save = test.copy()
test_to_save['predictions'] = test_predict
test_to_save.to_csv('test_1.csv', index=False)