# Modelo de predicciones

Importamos las librerías que utilizaremos

In [45]:
import numpy as np
import pandas as pd

Cargamos los datos extraidos previamente con web scraping y la clase ``HltvScraper``.

In [46]:
data = "WebScraping/Data/"

df_matches = pd.read_csv(data + "matches_played_by_team.csv")
df_players = pd.read_csv(data + "players_stats_by_team.csv")
df_teams   = pd.read_csv(data + "teams_stats_by_map.csv")

Definimos la siguiente función que no ayudará a limpiar los datos.

In [47]:
def dividir_columna(df: pd.DataFrame, column: str, sep: str, new_names: list) -> pd.DataFrame:
    position = df.columns.get_loc(column)
    columns_created = df.pop(column).str.split(sep, expand=True)
    columns_created.columns = new_names

    for i, col in enumerate(columns_created.columns):
        df.insert(position + i, col, columns_created[col])

    return df

Comenzamos limpiando ``df_matches``

In [48]:
df_matches.head()

Unnamed: 0,Date,Event,Opponent,Map,Result,W/L,Group,Team
0,19/11/24,Perfect World Shanghai Major 2024 Europe RMR A,SAW,Ancient,13 - 2,W,europa_1,Natus Vincere
1,19/11/24,Perfect World Shanghai Major 2024 Europe RMR A,SAW,Nuke,13 - 10,W,europa_1,Natus Vincere
2,18/11/24,Perfect World Shanghai Major 2024 Europe RMR A,MOUZ,Inferno,7 - 13,L,europa_1,Natus Vincere
3,18/11/24,Perfect World Shanghai Major 2024 Europe RMR A,MOUZ,Dust2,8 - 13,L,europa_1,Natus Vincere
4,18/11/24,Perfect World Shanghai Major 2024 Europe RMR A,MOUZ,Mirage,13 - 4,W,europa_1,Natus Vincere


In [49]:
df_matches.dtypes

Date        object
Event       object
Opponent    object
Map         object
Result      object
W/L         object
Group       object
Team        object
dtype: object

In [50]:
df_matches = dividir_columna(df_matches, "Result", " - ", ["Rounds won", "Rounds lost"])

columns_to_int = ["Rounds won", "Rounds lost"]
columns_to_category = ["Map", "W/L", "Group", "Team"]

for col in columns_to_int:
    df_matches[col] = df_matches[col].astype(int)

for col in columns_to_category:
    df_matches[col] = df_matches[col].astype("category")

df_matches["Date"] = df_matches["Date"].astype("datetime64[ns]")

In [51]:
df_matches.isna().sum()

Date           0
Event          0
Opponent       0
Map            0
Rounds won     0
Rounds lost    0
W/L            0
Group          0
Team           0
dtype: int64

In [52]:
df_dups = df_matches[df_matches.duplicated(keep=False)]
df_dups

Unnamed: 0,Date,Event,Opponent,Map,Rounds won,Rounds lost,W/L,Group,Team


Continuamos con ``df_players``

In [53]:
df_players.head()

Unnamed: 0,Kills,Deaths,Kill / Death,Kill / Round,Rounds with kills,Kill - Death difference,Total opening kills,Total opening deaths,Opening kill ratio,Opening kill rating,...,5 kill rounds,Rifle kills,Sniper kills,SMG kills,Pistol kills,Grenade,Other,Player,Group,Team
0,2058,2488,0.83,0.53,1561,-430,297,393,0.76,0.88,...,1,1366,19,273,331,70,21,aleksib,europa_1,Natus Vincere
1,2715,2588,1.05,0.7,1829,127,475,447,1.06,1.09,...,1,2160,14,73,449,20,14,im,europa_1,Natus Vincere
2,2838,2507,1.13,0.73,1924,331,453,371,1.22,1.09,...,1,2126,22,213,456,23,12,b1t,europa_1,Natus Vincere
3,2780,2499,1.11,0.72,1866,281,430,367,1.17,1.06,...,3,2121,23,145,460,27,27,jl,europa_1,Natus Vincere
4,2791,2256,1.24,0.72,1914,535,404,237,1.7,1.09,...,4,1114,1144,43,474,21,17,w0nderful,europa_1,Natus Vincere


In [54]:
df_players.dtypes

Kills                                  int64
Deaths                                 int64
Kill / Death                         float64
Kill / Round                         float64
Rounds with kills                      int64
Kill - Death difference                int64
Total opening kills                    int64
Total opening deaths                   int64
Opening kill ratio                   float64
Opening kill rating                  float64
Team win percent after first kill     object
First kill in won rounds              object
0 kill rounds                          int64
1 kill rounds                          int64
2 kill rounds                          int64
3 kill rounds                          int64
4 kill rounds                          int64
5 kill rounds                          int64
Rifle kills                            int64
Sniper kills                           int64
SMG kills                              int64
Pistol kills                           int64
Grenade   

In [55]:
columns_to_float = ["Team win percent after first kill", "First kill in won rounds"]
columns_to_category = ["Player", "Group", "Team"]

for col in columns_to_float:
    df_players[col] = df_players[col].str.strip("%").astype(float)

for col in columns_to_category:
    df_players[col] = df_players[col].astype("category")

In [56]:
df_players.isna().sum()

Kills                                0
Deaths                               0
Kill / Death                         0
Kill / Round                         0
Rounds with kills                    0
Kill - Death difference              0
Total opening kills                  0
Total opening deaths                 0
Opening kill ratio                   0
Opening kill rating                  0
Team win percent after first kill    0
First kill in won rounds             0
0 kill rounds                        0
1 kill rounds                        0
2 kill rounds                        0
3 kill rounds                        0
4 kill rounds                        0
5 kill rounds                        0
Rifle kills                          0
Sniper kills                         0
SMG kills                            0
Pistol kills                         0
Grenade                              0
Other                                0
Player                               0
Group                    

In [57]:
df_dups = df_players.iloc[:, :-3]
df_dups = df_dups[df_dups.duplicated(keep=False)]
df_dups


Unnamed: 0,Kills,Deaths,Kill / Death,Kill / Round,Rounds with kills,Kill - Death difference,Total opening kills,Total opening deaths,Opening kill ratio,Opening kill rating,...,2 kill rounds,3 kill rounds,4 kill rounds,5 kill rounds,Rifle kills,Sniper kills,SMG kills,Pistol kills,Grenade,Other


Por último revisamos ``df_teams``

In [58]:
df_teams.head()

Unnamed: 0,Times played,Wins / draws / losses,Total rounds played,Rounds won,Win percent,Pistol rounds,Pistol rounds won,Pistol round win percent,CT round win percent,T round win percent,Group,Team,Map Name
0,34,22 / 0 / 12,724,390,64.7%,68,38,55.9%,55.1%,52.6%,europa_1,Natus Vincere,Ancient
1,0,0 / 0 / 0,0,0,0.0%,0,0,0.0%,0.0%,0.0%,europa_1,Vitality,Ancient
2,23,14 / 0 / 9,513,280,60.9%,46,28,60.9%,56.4%,52.9%,europa_1,MOUZ,Ancient
3,33,21 / 0 / 12,752,409,63.6%,66,40,60.6%,53.8%,54.9%,europa_1,FaZe,Ancient
4,22,8 / 0 / 14,479,224,36.4%,44,23,52.3%,50.4%,42.3%,europa_1,Falcons,Ancient


In [59]:
df_teams = dividir_columna(df_teams, "Wins / draws / losses", " / ", ["Wins", "Draws", "Losses"])

In [60]:
df_dups = df_teams.iloc[:, :-3]
df_dups = df_dups[df_dups.duplicated(keep=False)]
df_dups

Unnamed: 0,Times played,Wins,Draws,Losses,Total rounds played,Rounds won,Win percent,Pistol rounds,Pistol rounds won,Pistol round win percent,CT round win percent,T round win percent
1,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
58,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
77,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
103,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
183,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
185,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
194,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
213,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
222,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%
238,0,0,0,0,0,0,0.0%,0,0,0.0%,0.0%,0.0%


In [61]:
df_teams = df_teams.drop(index=df_dups.index)

In [62]:
df_teams.head(1)

Unnamed: 0,Times played,Wins,Draws,Losses,Total rounds played,Rounds won,Win percent,Pistol rounds,Pistol rounds won,Pistol round win percent,CT round win percent,T round win percent,Group,Team,Map Name
0,34,22,0,12,724,390,64.7%,68,38,55.9%,55.1%,52.6%,europa_1,Natus Vincere,Ancient


In [63]:
df_teams.dtypes

Times played                 int64
Wins                        object
Draws                       object
Losses                      object
Total rounds played          int64
Rounds won                   int64
Win percent                 object
Pistol rounds                int64
Pistol rounds won            int64
Pistol round win percent    object
CT round win percent        object
T round win percent         object
Group                       object
Team                        object
Map Name                    object
dtype: object

In [64]:
columns_to_int = ["Wins", "Draws", "Losses"]
columns_to_float = ["Win percent", "Pistol round win percent", "CT round win percent", "T round win percent"]
columns_to_category = ["Group", "Team", "Map Name"]

for col in columns_to_int:
    df_teams[col] = df_teams[col].astype(int)

for col in columns_to_float:
    df_teams[col] = df_teams[col].str.strip("%").astype(float)

for col in columns_to_category:
    df_teams[col] = df_teams[col].astype("category")