# Análise Exploratória de um Dataset de partidas de League of Legends

## 0.1) Import das Bibliotecas

In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## 1) Coleta e Limpeza dos Dados

In [2]:
data = pd.read_csv('high_diamond_ranked_10min.csv')
data.head()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
0,4519157822,0,28,2,1,9,6,11,0,0,...,0,16567,6.8,17047,197,55,-643,8,19.7,1656.7
1,4523371949,0,12,1,0,5,5,5,0,0,...,1,17620,6.8,17438,240,52,2908,1173,24.0,1762.0
2,4521474530,0,15,0,0,7,11,4,1,1,...,0,17285,6.8,17254,203,28,1172,1033,20.3,1728.5
3,4524384067,0,43,1,0,4,5,5,1,0,...,0,16478,7.0,17961,235,47,1321,7,23.5,1647.8
4,4436033771,0,75,4,0,6,6,6,0,0,...,0,17404,7.0,18313,225,67,1004,-230,22.5,1740.4


In [3]:
data.shape

(9879, 40)

### Tipos dos dados

In [12]:
# Imprimindo os tipo de todas as colunas do dataframe
for col in data.columns:
    if 'red' in col:
        print(f'\033[1;31m{col}: {data[col].dtype}', end=" | ")
    elif 'blue' in col:
        print(f'\033[1;36m{col}: {data[col].dtype}', end=" | ")
    
    print(f'{col}: {data[col].dtype}', end=" | ")

gameId: int64 | [1;36mblueWins: int64 | blueWins: int64 | [1;36mblueWardsPlaced: int64 | blueWardsPlaced: int64 | [1;36mblueWardsDestroyed: int64 | blueWardsDestroyed: int64 | [1;36mblueFirstBlood: int64 | blueFirstBlood: int64 | [1;36mblueKills: int64 | blueKills: int64 | [1;36mblueDeaths: int64 | blueDeaths: int64 | [1;36mblueAssists: int64 | blueAssists: int64 | [1;36mblueEliteMonsters: int64 | blueEliteMonsters: int64 | [1;36mblueDragons: int64 | blueDragons: int64 | [1;36mblueHeralds: int64 | blueHeralds: int64 | [1;36mblueTowersDestroyed: int64 | blueTowersDestroyed: int64 | [1;36mblueTotalGold: int64 | blueTotalGold: int64 | [1;36mblueAvgLevel: float64 | blueAvgLevel: float64 | [1;36mblueTotalExperience: int64 | blueTotalExperience: int64 | [1;36mblueTotalMinionsKilled: int64 | blueTotalMinionsKilled: int64 | [1;36mblueTotalJungleMinionsKilled: int64 | blueTotalJungleMinionsKilled: int64 | [1;36mblueGoldDiff: int64 | blueGoldDiff: int64 | [1;36mblueExperienceDi

In [13]:
data.sample(1)

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
6676,4479605659,0,12,2,1,3,2,4,0,0,...,0,15127,7.2,18722,240,52,-1180,-1124,24.0,1512.7


In [14]:
data.dtypes.unique()

array([dtype('int64'), dtype('float64')], dtype=object)

### Estatísticas de Resumo

In [51]:
df_blue = data.loc[:, data.columns.map(lambda x: True if "blue" in x or x == "gameId" or x == "blueWins" else False)]
df_red = data.loc[:, data.columns.map(lambda x: True if "red" in x or x == "gameId" or x == "blueWins" else False)]

In [52]:
df_resume_stats_red = df_red.describe()
df_resume_stats_blue = df_blue.describe()

In [53]:
plus_resume_stats_red = pd.DataFrame(data=[df_red.skew(), df_red.kurtosis()], columns=df_red.kurtosis().index, index=["skew", "kurtosis"])
plus_resume_stats_blue = pd.DataFrame(data=[df_blue.skew(), df_blue.kurtosis()], columns=df_blue.kurtosis().index, index=["skew", "kurtosis"])

In [54]:
df_describe_red = pd.concat([df_resume_stats_red, plus_resume_stats_red])
df_describe_blue = pd.concat([df_resume_stats_blue, plus_resume_stats_blue])


Estatística de resumo do time de lado VERMELHO

In [55]:
df_describe_red

Unnamed: 0,gameId,blueWins,redWardsPlaced,redWardsDestroyed,redFirstBlood,redKills,redDeaths,redAssists,redEliteMonsters,redDragons,redHeralds,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
count,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0
mean,4500084044.85,0.5,22.37,2.72,0.5,6.14,6.18,6.66,0.57,0.41,0.16,0.04,16489.04,6.93,17961.73,217.35,51.31,-14.41,33.62,21.73,1648.9
std,27573278.49,0.5,18.46,2.14,0.5,2.93,3.01,4.06,0.63,0.49,0.37,0.22,1490.89,0.31,1198.58,21.91,10.03,2453.35,1920.37,2.19,149.09
min,4295358071.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11212.0,4.8,10465.0,107.0,4.0,-11467.0,-8348.0,10.7,1121.2
25%,4483301169.0,0.0,14.0,1.0,0.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,15427.5,6.8,17209.5,203.0,44.0,-1596.0,-1212.0,20.3,1542.75
50%,4510920346.0,0.0,16.0,2.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,16378.0,7.0,17974.0,218.0,51.0,-14.0,28.0,21.8,1637.8
75%,4521733208.5,1.0,20.0,4.0,1.0,8.0,8.0,9.0,1.0,1.0,0.0,0.0,17418.5,7.2,18764.5,233.0,57.0,1585.5,1290.5,23.3,1741.85
max,4527990640.0,1.0,276.0,24.0,1.0,22.0,22.0,28.0,2.0,1.0,1.0,2.0,22732.0,8.2,22269.0,289.0,92.0,10830.0,9333.0,28.9,2273.2
skew,-1.46,0.0,4.56,2.95,0.02,0.51,0.54,0.82,0.62,0.35,1.85,5.34,0.41,-0.4,-0.28,-0.29,0.23,-0.03,-0.02,-0.29,0.41
kurtosis,3.33,-2.0,30.47,18.24,-2.0,0.21,0.26,0.79,-0.57,-1.88,1.44,30.56,0.22,1.24,0.82,0.23,0.42,0.3,0.36,0.23,0.22


Estatística de resumo do time de lado AZUL

In [56]:
df_describe_blue

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeralds,blueTowersDestroyed,blueTotalGold,blueAvgLevel,blueTotalExperience,blueTotalMinionsKilled,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin
count,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0
mean,4500084044.85,0.5,22.29,2.82,0.5,6.18,6.14,6.65,0.55,0.36,0.19,0.05,16503.46,6.92,17928.11,216.7,50.51,14.41,-33.62,21.67,1650.35
std,27573278.49,0.5,18.02,2.17,0.5,3.01,2.93,4.06,0.63,0.48,0.39,0.24,1535.45,0.31,1200.52,21.86,9.9,2453.35,1920.37,2.19,153.54
min,4295358071.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10730.0,4.6,10098.0,90.0,0.0,-10830.0,-9333.0,9.0,1073.0
25%,4483301169.0,0.0,14.0,1.0,0.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,15415.5,6.8,17168.0,202.0,44.0,-1585.5,-1290.5,20.2,1541.55
50%,4510920346.0,0.0,16.0,3.0,1.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,16398.0,7.0,17951.0,218.0,50.0,14.0,-28.0,21.8,1639.8
75%,4521733208.5,1.0,20.0,4.0,1.0,8.0,8.0,9.0,1.0,1.0,0.0,0.0,17459.0,7.2,18724.0,232.0,56.0,1596.0,1212.0,23.2,1745.9
max,4527990640.0,1.0,250.0,27.0,1.0,22.0,22.0,29.0,2.0,1.0,1.0,4.0,23701.0,8.0,22224.0,283.0,92.0,11467.0,8348.0,28.3,2370.1
skew,-1.46,0.0,4.14,2.85,-0.02,0.54,0.51,0.89,0.69,0.57,1.6,5.59,0.47,-0.34,-0.25,-0.27,0.12,0.03,0.02,-0.27,0.47
kurtosis,3.33,-2.0,23.44,17.2,-2.0,0.26,0.21,1.16,-0.5,-1.67,0.55,39.86,0.48,1.12,0.68,0.17,0.39,0.3,0.36,0.17,0.48


### Limpeza dos Dados

#### Dados nulos

In [19]:
data.isna().sum().sum()

0

#### Dados duplicados

In [22]:
data.duplicated(subset='gameId').sum()

0

## Análise Exploratória de Dados

### Análise Univariada

In [57]:
sns.histplot(df_red)

### Análise Bivariada

### Análise Multi-variada

### Validação de Hipóteses

### Importância das Features