In [8]:
import joblib
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm

In [9]:
APP_ID = "3d0a5056c9a299b9f58bb53d62895c90"
USER_ID = 10915463

In [10]:
with open("autogluon_model.joblib", "rb") as f:
    model = joblib.load(f)

In [11]:
with open("means.joblib", "rb") as f:
    means = joblib.load(f)

In [12]:
with open("stds.joblib", "rb") as f:
    stds = joblib.load(f)

In [13]:
with open("columns.joblib", "rb") as f:
    final_columns = joblib.load(f)

In [14]:
url = f"https://papi.tanksblitz.ru/wotb/encyclopedia/vehicles/?application_id={APP_ID}"
response = requests.get(url)

In [8]:
general_data = []
general_columns = ["tank_id", "nation", "tier", "type", "name"]
premium_data = []
premium_columns = [
    "tank_id",
    "nation",
    "tier",
    "type",
    "name",
    "default_profile.firepower",
    "default_profile.hp",
    "default_profile.hull_hp",
    "default_profile.hull_weight",
    "default_profile.maneuverability",
    "default_profile.max_weight",
    "default_profile.protection",
    "default_profile.shot_efficiency",
    "default_profile.signal_range",
    "default_profile.speed_backward",
    "default_profile.speed_forward",
    "default_profile.weight",
    "default_profile.armor.hull.front",
    "default_profile.armor.hull.rear",
    "default_profile.armor.hull.sides",
    "default_profile.armor.turret.front",
    "default_profile.armor.turret.rear",
    "default_profile.armor.turret.sides",
    "default_profile.engine.fire_chance",
    "default_profile.engine.power",
    "default_profile.gun.aim_time",
    "default_profile.gun.caliber",
    "default_profile.gun.clip_capacity",
    "default_profile.gun.clip_reload_time",
    "default_profile.gun.dispersion",
    "default_profile.gun.fire_rate",
    "default_profile.gun.move_down_arc",
    "default_profile.gun.move_up_arc",
    "default_profile.gun.reload_time",
    "default_profile.gun.traverse_speed",
    "default_profile.suspension.load_limit",
    "default_profile.suspension.traverse_speed",
    "default_profile.turret.hp",
    "default_profile.turret.traverse_left_arc",
    "default_profile.turret.traverse_right_arc",
    "default_profile.turret.traverse_speed",
    "default_profile.turret.view_range",
]
for i in response.json().get("data").values():
    if i.get("is_premium"):
        temp_data = {}
        for col in premium_columns:
            temp = i
            splitted_col = col.split(".")
            for part_col in splitted_col:
                temp = temp.get(part_col)
            temp_data[col] = temp
        premium_data.append(temp_data)
    else:
        temp = {k: v for k, v in i.items() if k in general_columns}
        general_data.append(temp)

In [9]:
general_df = pd.DataFrame(general_data, columns=general_columns)
general_df

Unnamed: 0,tank_id,nation,tier,type,name
0,1,ussr,5,mediumTank,Т-34
1,17,germany,5,mediumTank,Pz.Kpfw. IV Ausf. G
2,1569,usa,7,mediumTank,T20
3,8225,usa,8,AT-SPG,T28
4,1297,germany,7,mediumTank,Panther I
...,...,...,...,...,...
235,3681,japan,10,mediumTank,STB-1
236,7505,uk,3,mediumTank,Cruiser Mk. IV
237,6273,european,7,mediumTank,Konštrukta T-34/100
238,20049,uk,8,lightTank,FV301


In [10]:
premium_df = pd.DataFrame(premium_data, columns=premium_columns)
premium_df

Unnamed: 0,tank_id,nation,tier,type,name,default_profile.firepower,default_profile.hp,default_profile.hull_hp,default_profile.hull_weight,default_profile.maneuverability,...,default_profile.gun.move_up_arc,default_profile.gun.reload_time,default_profile.gun.traverse_speed,default_profile.suspension.load_limit,default_profile.suspension.traverse_speed,default_profile.turret.hp,default_profile.turret.traverse_left_arc,default_profile.turret.traverse_right_arc,default_profile.turret.traverse_speed,default_profile.turret.view_range
0,3073,ussr,3,lightTank,Т-46,21,480,384,7800,72,...,16,9.00,43.75,17300,48,96,180,180,44,220
1,8049,other,5,mediumTank,Шип,30,650,496,12292,47,...,25,7.80,35.00,25000,33,154,180,180,40,240
2,63841,japan,5,mediumTank,Panzer IV Lightning,29,710,568,11270,48,...,20,8.04,35.00,28000,35,142,180,180,48,240
3,52513,usa,7,heavyTank,M6A2E1,47,1400,1120,36550,30,...,20,10.50,35.00,70500,25,280,180,180,25,250
4,6481,uk,4,lightTank,Covenanter,29,600,480,9510,50,...,20,10.00,43.75,20500,40,120,180,180,48,230
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278,4977,other,5,mediumTank,Мародёр,29,700,560,13145,50,...,12,8.00,40.25,37000,38,140,180,180,42,240
279,18753,france,8,AT-SPG,AMX Canon d'assaut 105,61,1150,920,20175,65,...,20,9.50,20.00,32000,37,230,10,10,26,240
280,15889,germany,6,mediumTank,VK 30.02 (M),40,1050,840,15950,52,...,18,6.16,35.00,45000,38,210,180,180,30,240
281,4705,japan,6,mediumTank,Firefly Lightning,44,960,768,18110,51,...,20,5.70,40.25,36100,42,192,180,180,39,240


In [11]:
tank_drop_ids = [21793, 64769, 64273, 64801]
premium_df = premium_df[~premium_df.tank_id.isin(tank_drop_ids)]

In [12]:
columns = ["user_id", "tank_id"]
user_tanks_columns = [
    "spotted",
    "hits",
    "frags",
    "max_xp",
    "wins",
    "losses",
    "capture_points",
    "battles",
    "damage_dealt",
    "damage_received",
    "max_frags",
    "shots",
    "frags8p",
    "xp",
    "win_and_survived",
    "survived_battles",
    "dropped_capture_points",
]
tanks_columns = ["battle_life_time", "mark_of_mastery"]
columns = columns + user_tanks_columns + tanks_columns

In [13]:
user_df = pd.DataFrame()
url = f"https://papi.tanksblitz.ru/wotb/tanks/stats/?application_id={APP_ID}&account_id={USER_ID}"
data = requests.get(url).json()
data = data.get("data").get(f"{USER_ID}")
if data != None:
    for el in data:
        full_data = {"user_id": [USER_ID]}
        full_data["tank_id"] = el.get("tank_id")
        full_data["battle_life_time"] = [el.get("battle_life_time")]
        full_data["mark_of_mastery"] = [el.get("mark_of_mastery")]
        for k, v in el.get("all").items():
            full_data[k] = [v]
        temp_df = pd.DataFrame(full_data, columns=columns)
        user_df = pd.concat([user_df, temp_df], ignore_index=True)

In [14]:
user_df

Unnamed: 0,user_id,tank_id,spotted,hits,frags,max_xp,wins,losses,capture_points,battles,...,damage_received,max_frags,shots,frags8p,xp,win_and_survived,survived_battles,dropped_capture_points,battle_life_time,mark_of_mastery
0,10915463,6961,8,146,35,1689,15,7,0,22,...,16336,4,167,27,23985,12,12,23,5954,2
1,10915463,8257,8,160,15,439,8,11,0,20,...,4988,3,183,0,4305,4,4,0,2702,2
2,10915463,769,30,130,11,664,9,8,0,17,...,6627,2,138,0,4216,1,1,5,1653,2
3,10915463,9073,0,0,0,499,0,0,0,0,...,0,0,0,0,0,0,0,0,157,0
4,10915463,2849,78,469,92,1825,46,36,0,82,...,113304,4,540,77,71896,33,33,24,16757,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,10915463,4993,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
284,10915463,7745,6,46,5,629,2,0,0,2,...,256,3,59,0,980,2,2,0,382,3
285,10915463,801,65,410,50,1089,33,26,0,59,...,52265,4,481,0,29673,17,17,31,8896,3
286,10915463,385,111,492,53,2007,29,30,0,60,...,95780,4,572,53,56951,22,22,94,11865,4


In [15]:
user_general_df = user_df[user_df.tank_id.isin(general_df.tank_id.values)]
user_general_df = user_general_df.query(
    "(battles >= wins + losses) & (battles > 10) & (damage_dealt > 0)"
)
user_general_df

Unnamed: 0,user_id,tank_id,spotted,hits,frags,max_xp,wins,losses,capture_points,battles,...,damage_received,max_frags,shots,frags8p,xp,win_and_survived,survived_battles,dropped_capture_points,battle_life_time,mark_of_mastery
2,10915463,769,30,130,11,664,9,8,0,17,...,6627,2,138,0,4216,1,1,5,1653,2
9,10915463,6721,73,313,38,1018,32,22,106,55,...,34031,3,362,0,25089,22,22,9,7900,4
10,10915463,3921,154,1274,90,1842,71,72,58,144,...,201621,5,1435,63,85451,36,36,122,21803,3
15,10915463,8785,23,496,44,1096,22,19,111,41,...,19262,4,587,0,17956,16,17,3,7551,3
16,10915463,2321,49,416,52,1600,27,17,48,44,...,38085,5,482,0,31577,21,21,35,7294,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,10915463,1809,18,259,50,1129,25,21,22,46,...,15306,4,298,0,15589,16,16,1,7955,3
278,10915463,7185,73,451,53,1278,34,27,0,61,...,44804,4,524,0,37868,20,20,1,8822,2
280,10915463,18433,34,67,8,1176,7,4,0,11,...,8749,2,74,0,7977,4,4,0,1354,2
285,10915463,801,65,410,50,1089,33,26,0,59,...,52265,4,481,0,29673,17,17,31,8896,3


In [16]:
avg_cols = user_general_df.drop(
    ["user_id", "tank_id", "max_xp", "battles", "max_frags", "mark_of_mastery"], axis=1
).columns
user_general_df[avg_cols] = (
    user_general_df[avg_cols].div(user_general_df["battles"], axis=0).astype("float32")
)
user_general_df

Unnamed: 0,user_id,tank_id,spotted,hits,frags,max_xp,wins,losses,capture_points,battles,...,damage_received,max_frags,shots,frags8p,xp,win_and_survived,survived_battles,dropped_capture_points,battle_life_time,mark_of_mastery
2,10915463,769,1.764706,7.647059,0.647059,664,0.529412,0.470588,0.000000,17,...,389.823517,2,8.117647,0.000000,248.000000,0.058824,0.058824,0.294118,97.235291,2
9,10915463,6721,1.327273,5.690909,0.690909,1018,0.581818,0.400000,1.927273,55,...,618.745483,3,6.581818,0.000000,456.163635,0.400000,0.400000,0.163636,143.636368,4
10,10915463,3921,1.069444,8.847222,0.625000,1842,0.493056,0.500000,0.402778,144,...,1400.145874,5,9.965278,0.437500,593.409729,0.250000,0.250000,0.847222,151.409729,3
15,10915463,8785,0.560976,12.097561,1.073171,1096,0.536585,0.463415,2.707317,41,...,469.804871,4,14.317073,0.000000,437.951233,0.390244,0.414634,0.073171,184.170731,3
16,10915463,2321,1.113636,9.454545,1.181818,1600,0.613636,0.386364,1.090909,44,...,865.568176,5,10.954545,0.000000,717.659119,0.477273,0.477273,0.795455,165.772720,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,10915463,1809,0.391304,5.630435,1.086957,1129,0.543478,0.456522,0.478261,46,...,332.739136,4,6.478261,0.000000,338.891296,0.347826,0.347826,0.021739,172.934784,3
278,10915463,7185,1.196721,7.393443,0.868852,1278,0.557377,0.442623,0.000000,61,...,734.491821,4,8.590164,0.000000,620.786865,0.327869,0.327869,0.016393,144.622955,2
280,10915463,18433,3.090909,6.090909,0.727273,1176,0.636364,0.363636,0.000000,11,...,795.363647,2,6.727273,0.000000,725.181824,0.363636,0.363636,0.000000,123.090912,2
285,10915463,801,1.101695,6.949152,0.847458,1089,0.559322,0.440678,0.000000,59,...,885.847473,4,8.152542,0.000000,502.932190,0.288136,0.288136,0.525424,150.779663,3


In [17]:
already_has = user_df[user_df.tank_id.isin(premium_df.tank_id.values)].tank_id.values
predict_prem_df = premium_df[~premium_df.tank_id.isin(already_has)].query("tier >= 5")
predict_prem_df

Unnamed: 0,tank_id,nation,tier,type,name,default_profile.firepower,default_profile.hp,default_profile.hull_hp,default_profile.hull_weight,default_profile.maneuverability,...,default_profile.gun.move_up_arc,default_profile.gun.reload_time,default_profile.gun.traverse_speed,default_profile.suspension.load_limit,default_profile.suspension.traverse_speed,default_profile.turret.hp,default_profile.turret.traverse_left_arc,default_profile.turret.traverse_right_arc,default_profile.turret.traverse_speed,default_profile.turret.view_range
1,8049,other,5,mediumTank,Шип,30,650,496,12292,47,...,25,7.80,35.00,25000,33,154,180,180,40,240
2,63841,japan,5,mediumTank,Panzer IV Lightning,29,710,568,11270,48,...,20,8.04,35.00,28000,35,142,180,180,48,240
3,52513,usa,7,heavyTank,M6A2E1,47,1400,1120,36550,30,...,20,10.50,35.00,70500,25,280,180,180,25,250
6,63585,japan,6,heavyTank,Tiger Kuromorimine SP,48,1150,920,25350,30,...,17,7.30,35.00,61000,27,230,180,180,20,240
13,21281,usa,6,mediumTank,Рудольф,44,980,736,19600,52,...,25,5.70,40.25,38500,44,244,180,180,39,240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,817,china,8,heavyTank,WZ-111,45,1800,1440,18845,30,...,23,14.00,26.25,48000,30,360,180,180,26,240
279,18753,france,8,AT-SPG,AMX Canon d'assaut 105,61,1150,920,20175,65,...,20,9.50,20.00,32000,37,230,10,10,26,240
280,15889,germany,6,mediumTank,VK 30.02 (M),40,1050,840,15950,52,...,18,6.16,35.00,45000,38,210,180,180,30,240
281,4705,japan,6,mediumTank,Firefly Lightning,44,960,768,18110,51,...,20,5.70,40.25,36100,42,192,180,180,39,240


In [18]:
predict_prem_df = predict_prem_df.drop(["name", "default_profile.signal_range"], axis=1)

In [19]:
user_general_df = user_general_df.pivot(index="user_id", columns="tank_id")
user_general_df.columns = [
    "_".join([str(part) for part in col if part not in (None, "")])
    for col in user_general_df.columns
]
user_general_df.reset_index(inplace=True)
user_general_df

Unnamed: 0,user_id,spotted_1,spotted_17,spotted_257,spotted_385,spotted_529,spotted_769,spotted_801,spotted_849,spotted_1025,...,mark_of_mastery_11793,mark_of_mastery_13841,mark_of_mastery_14097,mark_of_mastery_14145,mark_of_mastery_14865,mark_of_mastery_15649,mark_of_mastery_16641,mark_of_mastery_18001,mark_of_mastery_18433,mark_of_mastery_58641
0,10915463,1.322581,1.377049,0.977273,1.85,1.2,1.764706,1.101695,0.852941,1.916667,...,3,3,3,3,3,4,3,3,2,4


In [20]:
prem_cols = (
    ["tank_id"]
    + [col for col in predict_prem_df.columns if "default_profile" in col]
    + ["nation", "tier", "type"]
)
predict_prem_df = predict_prem_df[prem_cols]
predict_prem_df

Unnamed: 0,tank_id,default_profile.firepower,default_profile.hp,default_profile.hull_hp,default_profile.hull_weight,default_profile.maneuverability,default_profile.max_weight,default_profile.protection,default_profile.shot_efficiency,default_profile.speed_backward,...,default_profile.suspension.load_limit,default_profile.suspension.traverse_speed,default_profile.turret.hp,default_profile.turret.traverse_left_arc,default_profile.turret.traverse_right_arc,default_profile.turret.traverse_speed,default_profile.turret.view_range,nation,tier,type
1,8049,30,650,496,12292,47,25000,0,45,20,...,25000,33,154,180,180,40,240,other,5,mediumTank
2,63841,29,710,568,11270,48,28000,0,43,18,...,28000,35,142,180,180,48,240,japan,5,mediumTank
3,52513,47,1400,1120,36550,30,70500,0,58,10,...,70500,25,280,180,180,25,250,usa,7,heavyTank
6,63585,48,1150,920,25350,30,61000,0,51,12,...,61000,27,230,180,180,20,240,japan,6,heavyTank
13,21281,44,980,736,19600,52,38500,0,51,20,...,38500,44,244,180,180,39,240,usa,6,mediumTank
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,817,45,1800,1440,18845,30,48000,0,64,17,...,48000,30,360,180,180,26,240,china,8,heavyTank
279,18753,61,1150,920,20175,65,32000,0,83,20,...,32000,37,230,10,10,26,240,france,8,AT-SPG
280,15889,40,1050,840,15950,52,45000,0,59,20,...,45000,38,210,180,180,30,240,germany,6,mediumTank
281,4705,44,960,768,18110,51,36100,0,65,18,...,36100,42,192,180,180,39,240,japan,6,mediumTank


In [21]:
X = user_general_df.merge(predict_prem_df, how="cross")

In [22]:
needed_columns = list(set(final_columns).difference(set(X.columns)))

In [23]:
X = pd.concat([X, pd.DataFrame(columns=needed_columns)], axis=1)

In [24]:
X = X[["user_id", "tank_id"] + list(final_columns)]

In [25]:
def scale_data(data, means, stds):
    X_values = data.to_numpy(dtype=np.float32)
    X_values = (X_values - means) / stds

    return X_values

In [26]:
cat_cols = [col for col in X.columns if "mark_of_mastery" in col] + [
    "nation",
    "tier",
    "type",
]
num_cols = [col for col in X.columns if col not in ["user_id", "tank_id"] + cat_cols]

In [27]:
X[num_cols] = scale_data(X[num_cols], means, stds)

In [28]:
X[cat_cols] = X[cat_cols].astype("category")

In [45]:
X["preds"] = model.predict(X[final_columns]).round().astype(int)

In [46]:
res = X[["tank_id", "preds"]].sort_values("preds", ascending=False).head(3)