# Bem Vindo!
Agora sim, Vamos começar o KNN com a nossa tabela da premier league!

In [2411]:
import numpy as np
import pandas as pd


In [2412]:
#Primeiro vamos carregar o dataframe que criamos no notebook anterior
df_final_com_pontos = pd.read_csv('df_final_premier_league.csv')

In [2413]:
#Vamos agora verificar o tipo dos dados 
df_final_com_pontos.dtypes

club_id            float64
season               int64
name                object
last_position      float64
own_goals_1        float64
                    ...   
total_points_9     float64
total_points_10    float64
gols_pro           float64
gols_sofridos      float64
saldo_gols         float64
Length: 107, dtype: object

In [2414]:
#Vamos começar retirando o nome dos times  pois agora ele não é mais necessários
df_final_com_pontos = df_final_com_pontos.drop(columns=['name'])

In [2415]:
#Vamos também retirar o código do time, pois ele pode acabar enviesando o modelo, induzindo-o a pensaar que um time tende a repetir o desempenho 
df_final_com_pontos = df_final_com_pontos.drop(columns=['club_id'])

Começando com o KNN:

In [2416]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report


In [2417]:
# Vamos definir X e Y. Y será a coluna last_position
X = df_final_com_pontos.drop(columns=['last_position'])


In [2418]:
#Vamos separar nosso x test e x train de acordo com as temporadas.
X_train = X[X['season'] != 2024]
X_test = X[X['season'] == 2024]

# Agora vamos separar y para ser apenas a coluna last_position
y_train = df_final_com_pontos[df_final_com_pontos['season'] != 2024]['last_position']
y_test = df_final_com_pontos[df_final_com_pontos['season'] == 2024]['last_position']

In [2419]:
#Usando um scaler para normalizar os dados
scaler = MinMaxScaler(feature_range=(0,1))

In [2420]:
#Aplicando a normalização nos dados de treino e teste
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)


In [2421]:
knn = KNeighborsClassifier(n_neighbors=10)


In [2422]:
#Vamos treinar o modelo com os dados de treino
knn.fit(X_train, y_train)

In [2423]:
#Achando as predições
y_pred = knn.predict(X_test)

In [2424]:
# SOLUÇÃO CORRIGIDA: Garantir posições únicas sem criar novas duplicatas
def corrigir_posicoes_duplicadas_v2(y_pred, X_test_original, criterios_desempate):
    """
    Versão corrigida que garante posições únicas de 1 a 20
    """
    import pandas as pd
    
    # Criar DataFrame com predições e dados originais
    df_pred = pd.DataFrame(X_test_original)
    df_pred['posicao_prevista'] = y_pred
    df_pred['indice_original'] = range(len(y_pred))
    
    print(f"=== DIAGNÓSTICO INICIAL ===")
    print(f"Predições originais: {sorted(y_pred)}")
    
    # Contar duplicatas
    from collections import Counter
    contagem = Counter(y_pred)
    duplicadas = {pos: count for pos, count in contagem.items() if count > 1}
    print(f"Posições duplicadas: {duplicadas}")
    
    # ESTRATÉGIA: Ordenar todos os times e redistribuir posições sequenciais
    # Ordenar por: 1) posição prevista, 2) critérios de desempate
    df_ordenado = df_pred.sort_values(
        ['posicao_prevista'] + criterios_desempate,
        ascending=[True] + [False] * len(criterios_desempate)
    )
    
    print(f"\nOrdem final dos times:")
    for i, (_, row) in enumerate(df_ordenado.iterrows()):
        nova_pos = i + 1
        pos_original = row['posicao_prevista']
        criterio_valor = row[criterios_desempate[0]] if criterios_desempate else 'N/A'
        print(f"  Time {row['indice_original']}: pos {pos_original} → {nova_pos} (critério: {criterio_valor})")
    
    # Criar array de posições corrigidas
    y_pred_corrigido = np.zeros(len(y_pred), dtype=int)
    for nova_posicao, (_, row) in enumerate(df_ordenado.iterrows(), 1):
        idx_original = int(row['indice_original'])  # CORREÇÃO: converter para int
        y_pred_corrigido[idx_original] = nova_posicao
    
    # VERIFICAÇÃO FINAL
    print(f"\n=== VERIFICAÇÃO FINAL ===")
    print(f"Posições finais: {sorted(y_pred_corrigido)}")
    print(f"Posições únicas: {len(np.unique(y_pred_corrigido))}/20")
    print(f"Range: {min(y_pred_corrigido)} a {max(y_pred_corrigido)}")
    
    # Verificar se todas as posições 1-20 estão presentes
    posicoes_esperadas = set(range(1, 21))
    posicoes_reais = set(y_pred_corrigido)
    
    if posicoes_reais == posicoes_esperadas:
        print("✅ PERFEITO: Todas as posições 1-20 estão únicas!")
    else:
        print("❌ ERRO: Ainda há problemas!")
        print(f"Faltando: {posicoes_esperadas - posicoes_reais}")
        print(f"Extras: {posicoes_reais - posicoes_esperadas}")
    
    return y_pred_corrigido

# Primeiro, vamos verificar as predições originais
print("=== DIAGNÓSTICO DAS PREDIÇÕES ===")
print(f"Predições originais: {sorted(y_pred)}")

from collections import Counter
contagem = Counter(y_pred)
duplicadas = {pos: count for pos, count in contagem.items() if count > 1}
print(f"Posições duplicadas: {duplicadas}")
print(f"Posições únicas: {len(np.unique(y_pred))}/20")

# Recuperar dados originais (antes da normalização) para usar como critério de desempate
# Assumindo que temos uma coluna de pontos ou gols
X_test_original = X[X['season'] == 2024]

print(f"\nColunas disponíveis para desempate: {list(X_test_original.columns)}")

# DEBUG: Verificar tipos de dados
print(f"\nDEBUG:")
print(f"Tipo de y_pred: {type(y_pred)}")
print(f"Tipo de X_test_original: {type(X_test_original)}")
print(f"Shape de X_test_original: {X_test_original.shape}")

=== DIAGNÓSTICO DAS PREDIÇÕES ===
Predições originais: [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 5.0, 5.0, 8.0, 9.0, 10.0, 11.0, 11.0, 12.0, 14.0, 15.0, 17.0, 18.0]
Posições duplicadas: {1.0: 4, 2.0: 2, 5.0: 2, 11.0: 2, 4.0: 2}
Posições únicas: 13/20

Colunas disponíveis para desempate: ['season', 'own_goals_1', 'own_position_1', 'hosting_1', 'is_win_1', 'home_club_formation_1', 'aggregate_1', 'opponent_goals_1', 'opponent_position_1', 'away_club_formation_1', 'own_goals_2', 'own_position_2', 'hosting_2', 'is_win_2', 'home_club_formation_2', 'aggregate_2', 'opponent_goals_2', 'opponent_position_2', 'away_club_formation_2', 'own_goals_3', 'own_position_3', 'hosting_3', 'is_win_3', 'home_club_formation_3', 'aggregate_3', 'opponent_goals_3', 'opponent_position_3', 'away_club_formation_3', 'own_goals_4', 'own_position_4', 'hosting_4', 'is_win_4', 'home_club_formation_4', 'aggregate_4', 'opponent_goals_4', 'opponent_position_4', 'away_club_formation_4', 'own_goals_5', 'own_position_5', 'host

In [2425]:
# Aplicar a correção CORRIGIDA usando pontos como critério principal

# Definir critérios de desempate (ordem de prioridade)
criterios_desempate = ['total_points_10']  # Adicione outras colunas se disponível

# Aplicar correção com a versão corrigida
y_pred_corrigido = corrigir_posicoes_duplicadas_v2(
    y_pred, 
    X_test_original, 
    criterios_desempate
)

print("\n=== RESULTADO FINAL ===")
print(f"Predições corrigidas: {sorted(y_pred_corrigido)}")
print(f"Posições únicas: {len(np.unique(y_pred_corrigido))}/20")
print(f"Range: {min(y_pred_corrigido)} a {max(y_pred_corrigido)}")

# Verificação dupla para garantir
posicoes_esperadas = set(range(1, 21))
posicoes_obtidas = set(y_pred_corrigido)

if posicoes_obtidas == posicoes_esperadas:
    print("✅ SUCESSO: Todas as posições são únicas e corretas!")
else:
    print("❌ AINDA HÁ PROBLEMAS:")
    print(f"  Faltando: {posicoes_esperadas - posicoes_obtidas}")
    print(f"  Duplicadas: {posicoes_obtidas - posicoes_esperadas}")

# Comparar predições originais vs corrigidas
print(f"\nComparação:")
print(f"Original:   {y_pred}")
print(f"Corrigido:  {y_pred_corrigido}")

# Calcular nova acurácia
from sklearn.metrics import accuracy_score
nova_acuracia = accuracy_score(y_test, y_pred_corrigido)
acuracia_original = accuracy_score(y_test, y_pred)

print(f"\nAcurácia original: {acuracia_original:.3f}")
print(f"Acurácia corrigida: {nova_acuracia:.3f}")
print(f"Diferença: {nova_acuracia - acuracia_original:+.3f}")

# Mostrar quais posições mudaram
mudancas = sum(1 for i in range(len(y_pred)) if y_pred[i] != y_pred_corrigido[i])
print(f"Posições alteradas: {mudancas}/{len(y_pred)}")

=== DIAGNÓSTICO INICIAL ===
Predições originais: [1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 5.0, 5.0, 8.0, 9.0, 10.0, 11.0, 11.0, 12.0, 14.0, 15.0, 17.0, 18.0]
Posições duplicadas: {1.0: 4, 2.0: 2, 5.0: 2, 11.0: 2, 4.0: 2}

Ordem final dos times:
  Time 11.0: pos 1.0 → 1 (critério: 25.0)
  Time 4.0: pos 1.0 → 2 (critério: 23.0)
  Time 0.0: pos 1.0 → 3 (critério: 18.0)
  Time 16.0: pos 1.0 → 4 (critério: 18.0)
  Time 2.0: pos 2.0 → 5 (critério: 16.0)
  Time 12.0: pos 2.0 → 6 (critério: 15.0)
  Time 6.0: pos 4.0 → 7 (critério: 19.0)
  Time 9.0: pos 4.0 → 8 (critério: 16.0)
  Time 15.0: pos 5.0 → 9 (critério: 18.0)
  Time 3.0: pos 5.0 → 10 (critério: 15.0)
  Time 1.0: pos 8.0 → 11 (critério: 13.0)
  Time 14.0: pos 9.0 → 12 (critério: 15.0)
  Time 7.0: pos 10.0 → 13 (critério: 10.0)
  Time 17.0: pos 11.0 → 14 (critério: 4.0)
  Time 5.0: pos 11.0 → 15 (critério: 3.0)
  Time 18.0: pos 12.0 → 16 (critério: 12.0)
  Time 19.0: pos 14.0 → 17 (critério: 9.0)
  Time 8.0: pos 15.0 → 18 (critério: 5.0

Como já era de se esprar, prever posições exatas de um time de futebol é algo muito difícil. Logo, vamos ajustar a coluna last position para a métrica:

Campeão (1 posição)

Top 6: 2-6 colocado

Top 12: 7 - 12 colocado

Intermediários: 13 - 17 colocado

Rebaixados: 18 - 20 colocados

In [2426]:
print(y_pred)

[ 1.  8.  2.  5.  1. 11.  4. 10. 15.  4. 17.  1.  2. 18.  9.  5.  1. 11.
 12. 14.]


In [2427]:
#Vamos transformar a coluna last_position, oponnent_position_x e own_position_x em categorias
mapeamento_posicoes = {
    1:1,
    2:2,
    3:2,
    4:2,
    5:2,
    6:2,
    7:3,
    8:3,
    9:3,
    10:3,
    11:3,
    12:3,
    13:4,
    14:4,
    15:4,
    16:4,
    17:4,
    18:5,
    19:5,
    20:5
}
# Aplicando o mapeamento
#Criando uma cópia do DataFrame para evitar SettingWithCopyWarning
df_novo_final = pd.read_csv('df_final_premier_league.csv')
df_novo_final['last_position'] = df_novo_final['last_position'].map(mapeamento_posicoes)
df_novo_final.head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,total_points_4,total_points_5,total_points_6,total_points_7,total_points_8,total_points_9,total_points_10,gols_pro,gols_sofridos,saldo_gols
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,0.0,3.0,3.0,4.0,4.0,4.0,4.0,14.0,28.0,-14.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,4.0,4.0,7.0,8.0,8.0,9.0,9.0,8.0,10.0,-2.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,6.0,9.0,9.0,10.0,13.0,14.0,15.0,21.0,16.0,5.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,4.0,4.0,5.0,5.0,5.0,6.0,9.0,8.0,14.0,-6.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,7.0,8.0,11.0,11.0,14.0,14.0,15.0,13.0,11.0,2.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,7.0,10.0,11.0,14.0,14.0,14.0,17.0,15.0,11.0,4.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,6.0,7.0,10.0,10.0,11.0,12.0,12.0,9.0,9.0,0.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,10.0,13.0,16.0,19.0,22.0,22.0,23.0,22.0,10.0,12.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,1.0,1.0,2.0,3.0,3.0,4.0,5.0,12.0,21.0,-9.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,3.0,3.0,3.0,3.0,6.0,7.0,10.0,8.0,18.0,-10.0


In [2428]:
#Vamos criar um código que criará o número de vitórias, derrotas e empates de cada time nos 10 primeiros jogos
vitorias = 0
derrotas = 0
empates = 0
for seasons in df_novo_final['season'].unique():
    dados_season = df_novo_final[df_novo_final['season'] == seasons]
    for team in dados_season['name'].unique():
        #Filtrando os dados do time
        dados_time = df_novo_final[(df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)]  
        #Vamos fazer um for das 10 primeiras rodadas para contar cada coluna is_win
        for i in range(1,11):
            nome_coluna_win = f'is_win_{i}'
            if dados_time[nome_coluna_win].values[0] == 1:
                vitorias +=1
            elif dados_time[nome_coluna_win].values[0] == 0:
                derrotas +=1
            elif dados_time[nome_coluna_win].values[0] == 2:
                empates +=1
        #Agora vamos adicionar as colunas de vitoria, derrota e empate no data2me além de zerar as variáveis
        #Criando um mask:
        mask = (df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)
        df_novo_final.loc[mask, 'vitorias'] = vitorias
        df_novo_final.loc[mask, 'derrotas'] = derrotas
        df_novo_final.loc[mask, 'empates'] = empates
        vitorias = 0
        derrotas = 0
        empates = 0


#Vamos olhar o resultado
df_novo_final.head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,total_points_7,total_points_8,total_points_9,total_points_10,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,4.0,4.0,4.0,4.0,14.0,28.0,-14.0,1.0,8.0,1.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,8.0,8.0,9.0,9.0,8.0,10.0,-2.0,1.0,3.0,6.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,10.0,13.0,14.0,15.0,21.0,16.0,5.0,4.0,3.0,3.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,5.0,5.0,6.0,9.0,8.0,14.0,-6.0,2.0,5.0,3.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,11.0,14.0,14.0,15.0,13.0,11.0,2.0,4.0,3.0,3.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,14.0,14.0,14.0,17.0,15.0,11.0,4.0,5.0,3.0,2.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,10.0,11.0,12.0,12.0,9.0,9.0,0.0,2.0,2.0,6.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,19.0,22.0,22.0,23.0,22.0,10.0,12.0,7.0,1.0,2.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,3.0,3.0,4.0,5.0,12.0,21.0,-9.0,0.0,5.0,5.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,3.0,6.0,7.0,10.0,8.0,18.0,-10.0,2.0,4.0,4.0


In [2429]:
#Vamos criar uma coluna de jogos sem perder 
jogos_sem_perder = 0
for seasons in df_novo_final['season'].unique():
    dados_season = df_novo_final[df_novo_final['season'] == seasons]
    for team in dados_season['name'].unique():
        #Filtrando os dados do time
        dados_time = df_novo_final[(df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)]  
        #Vamos fazer um for das 10 primeiras rodadas para contar cada coluna is_win
        for i in range(1,11):
            nome_coluna_win = f'is_win_{i}'
            if dados_time[nome_coluna_win].values[0] == 1:
                jogos_sem_perder +=1
            elif dados_time[nome_coluna_win].values[0] == 0:
                jogos_sem_perder = 0
            elif dados_time[nome_coluna_win].values[0] == 2:
                jogos_sem_perder +=1
        #Agora vamos adicionar as colunas de vitoria, derrota e empate no data2me além de zerar as variáveis
        #Criando um mask:
        mask = (df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)
        df_novo_final.loc[mask, 'jogos_sem_perder'] = jogos_sem_perder
        jogos_sem_perder = 0

In [2430]:
df_novo_final.head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,total_points_8,total_points_9,total_points_10,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,4.0,4.0,4.0,14.0,28.0,-14.0,1.0,8.0,1.0,0.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,8.0,9.0,9.0,8.0,10.0,-2.0,1.0,3.0,6.0,0.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,13.0,14.0,15.0,21.0,16.0,5.0,4.0,3.0,3.0,4.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,5.0,6.0,9.0,8.0,14.0,-6.0,2.0,5.0,3.0,2.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,14.0,14.0,15.0,13.0,11.0,2.0,4.0,3.0,3.0,1.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,14.0,14.0,17.0,15.0,11.0,4.0,5.0,3.0,2.0,1.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,11.0,12.0,12.0,9.0,9.0,0.0,2.0,2.0,6.0,0.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,22.0,22.0,23.0,22.0,10.0,12.0,7.0,1.0,2.0,1.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,3.0,4.0,5.0,12.0,21.0,-9.0,0.0,5.0,5.0,2.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,6.0,7.0,10.0,8.0,18.0,-10.0,2.0,4.0,4.0,3.0


In [2431]:
casa = 0
fora = 0
#Agora vamos criar uma coluna que contabiliza a quantidade de jogos fora de casa que o time jogou
for seasons in df_novo_final['season'].unique():
    dados_season = df_novo_final[df_novo_final['season'] == seasons]
    for team in dados_season['name'].unique():
        #Filtrando os dados do time
        dados_time = df_novo_final[(df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)] 
        #Vamos pegar cada uma das colunas hosting_x 
        for i in range(1,11):
            nome_coluna_hosting = f'hosting_{i}'
            if dados_time[nome_coluna_hosting].values[0] == 1:
                casa +=1
            elif dados_time[nome_coluna_hosting].values[0] == 0:
                fora +=1
        #Agora vamos adcionar as colunas de casa e fora no dataframe além de zerar as as variáveis
        #Criando um mask:
        mask = (df_novo_final['name'] == team) & (df_novo_final['season'] == seasons)
        df_novo_final.loc[mask, 'casa'] = casa
        df_novo_final.loc[mask, 'fora'] = fora
        casa = 0
        fora = 0
#Vamos olhar o resultado
df_novo_final.head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,total_points_10,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,4.0,14.0,28.0,-14.0,1.0,8.0,1.0,0.0,5.0,5.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,9.0,8.0,10.0,-2.0,1.0,3.0,6.0,0.0,4.0,6.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,15.0,21.0,16.0,5.0,4.0,3.0,3.0,4.0,5.0,5.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,9.0,8.0,14.0,-6.0,2.0,5.0,3.0,2.0,4.0,6.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,15.0,13.0,11.0,2.0,4.0,3.0,3.0,1.0,6.0,4.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,17.0,15.0,11.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,12.0,9.0,9.0,0.0,2.0,2.0,6.0,0.0,5.0,5.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,23.0,22.0,10.0,12.0,7.0,1.0,2.0,1.0,5.0,5.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,5.0,12.0,21.0,-9.0,0.0,5.0,5.0,2.0,4.0,6.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,10.0,8.0,18.0,-10.0,2.0,4.0,4.0,3.0,5.0,5.0


## Nova Columna: Recém_promovido
Agora vamos criar uma nova feature que pode nos ajudar a prever melhor o futuro do campeonato

In [2432]:
#Vamos criar uma nova coluna para os times recém promovidos
#2012
mapeamento_rebaixados = {
    'Southampton Football Club':1,
    'Stoke City':0,
    'Fulham Football Club':0,
    'Aston Villa Football Club':0,
    'West Ham United Football Club': 1,
    'West Bromwich Albion':0,
    'Sunderland AFC':0,
    'Chelsea Football Club':0,
    'Reading FC':1,
    'Norwich City':0,
    'Arsenal Football Club':0,
    'Manchester City Football Club':0,
    'Newcastle United Football Club':0,
    'Liverpool Football Club':0,
    'Everton Football Club':0,
    'Queens Park Rangers':0,
    'Swansea City':0,
    'Manchester United Football Club':0,
    'Wigan Athletic':0,
    'Tottenham Hotspur Football Club':0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2012, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2012].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,14.0,28.0,-14.0,1.0,8.0,1.0,0.0,5.0,5.0,1.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,8.0,10.0,-2.0,1.0,3.0,6.0,0.0,4.0,6.0,0.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,21.0,16.0,5.0,4.0,3.0,3.0,4.0,5.0,5.0,0.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,8.0,14.0,-6.0,2.0,5.0,3.0,2.0,4.0,6.0,0.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,13.0,11.0,2.0,4.0,3.0,3.0,1.0,6.0,4.0,1.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,15.0,11.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0,0.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,9.0,9.0,0.0,2.0,2.0,6.0,0.0,5.0,5.0,0.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,22.0,10.0,12.0,7.0,1.0,2.0,1.0,5.0,5.0,0.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,12.0,21.0,-9.0,0.0,5.0,5.0,2.0,4.0,6.0,1.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,8.0,18.0,-10.0,2.0,4.0,4.0,3.0,5.0,5.0,0.0


In [2433]:
#2013
mapeamento_rebaixados = {
    'Stoke City':0,
    'West Bromwich Albion':0,
    'Sunderland AFC':0,
    'West Ham United Football Club': 0,
    'Manchester City Football Club':0,
    'Norwich City':0,
    'Newcastle United Football Club':0,
    'Southampton Football Club':0,
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Crystal Palace Football Club':1,
    'Everton Football Club':0,
    'Tottenham Hotspur Football Club':0,
    'Swansea City':0,
    'Manchester United Football Club':0,
    'Chelsea Football Club':0,
    'Hull City':1,
    'Liverpool Football Club':0,
    'Cardiff City':1,
    'Fulham Football Club':0,
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2013, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2013].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
20,512.0,2013,Stoke City,3,0.0,13.0,0,0,6.0,6,...,7.0,11.0,-4.0,2.0,5.0,3.0,1.0,5.0,5.0,0.0
21,984.0,2013,West Bromwich Albion,4,0.0,15.0,1,0,3.0,6,...,10.0,10.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0
22,289.0,2013,Sunderland AFC,4,0.0,12.0,1,0,1.0,6,...,7.0,22.0,-15.0,1.0,8.0,1.0,0.0,5.0,5.0,0.0
23,379.0,2013,West Ham United Football Club,4,2.0,4.0,1,1,1.0,7,...,8.0,8.0,0.0,2.0,4.0,4.0,2.0,5.0,5.0,0.0
24,281.0,2013,Manchester City Football Club,1,4.0,1.0,1,1,3.0,9,...,28.0,11.0,17.0,6.0,3.0,1.0,1.0,5.0,5.0,0.0
25,1123.0,2013,Norwich City,5,2.0,11.0,1,2,1.0,2,...,6.0,20.0,-14.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0
26,762.0,2013,Newcastle United Football Club,3,0.0,20.0,0,0,3.0,9,...,14.0,16.0,-2.0,4.0,4.0,2.0,1.0,5.0,5.0,0.0
27,180.0,2013,Southampton Football Club,3,1.0,8.0,0,1,13.0,6,...,11.0,4.0,7.0,5.0,1.0,4.0,7.0,5.0,5.0,0.0
28,11.0,2013,Arsenal Football Club,2,1.0,16.0,1,0,1.0,17,...,22.0,9.0,13.0,8.0,1.0,1.0,9.0,5.0,5.0,0.0
29,405.0,2013,Aston Villa Football Club,4,3.0,3.0,0,1,5.0,17,...,9.0,12.0,-3.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0


In [2434]:
#2014
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Burnley FC':1,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Hull City':0,
    'Leicester City Football Club':1,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Queens Park Rangers':1,
    'Southampton Football Club':0,
    'Stoke City':0,
    'Sunderland AFC':0,
    'Swansea City':0,
    'Tottenham Hotspur Football Club':0,
    'West Bromwich Albion':0,
    'West Ham United Football Club': 0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2014, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2014].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
40,11.0,2014,Arsenal Football Club,2,2.0,3.0,1,1,7.0,16,...,18.0,11.0,7.0,4.0,1.0,5.0,3.0,5.0,5.0,0.0
41,1039.0,2014,Queens Park Rangers,5,0.0,18.0,1,0,17.0,6,...,9.0,20.0,-11.0,2.0,7.0,1.0,0.0,5.0,5.0,1.0
42,984.0,2014,West Bromwich Albion,4,2.0,11.0,1,2,1.0,2,...,13.0,13.0,0.0,3.0,3.0,4.0,3.0,5.0,5.0,0.0
43,31.0,2014,Liverpool Football Club,2,2.0,4.0,1,1,1.0,16,...,13.0,13.0,0.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0
44,180.0,2014,Southampton Football Club,3,1.0,13.0,0,0,1.0,16,...,21.0,5.0,16.0,7.0,2.0,1.0,3.0,5.0,5.0,0.0
45,512.0,2014,Stoke City,3,0.0,17.0,1,0,1.0,6,...,10.0,12.0,-2.0,3.0,4.0,3.0,1.0,5.0,5.0,0.0
46,873.0,2014,Crystal Palace Football Club,3,1.0,14.0,0,0,1.0,16,...,14.0,19.0,-5.0,2.0,5.0,3.0,0.0,5.0,5.0,0.0
47,3008.0,2014,Hull City,5,1.0,8.0,0,1,11.0,6,...,13.0,14.0,-1.0,2.0,3.0,5.0,0.0,5.0,5.0,0.0
48,762.0,2014,Newcastle United Football Club,4,0.0,20.0,1,0,5.0,7,...,11.0,15.0,-4.0,3.0,3.0,4.0,4.0,5.0,5.0,0.0
49,379.0,2014,West Ham United Football Club,3,0.0,16.0,1,0,1.0,6,...,19.0,14.0,5.0,5.0,3.0,2.0,4.0,5.0,5.0,0.0


In [2435]:
#2015 
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Association Football Club Bournemouth':1,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Norwich City':1,
    'Southampton Football Club':0,
    'Stoke City':0,
    'Sunderland AFC':0,
    'Swansea City':0,
    'Tottenham Hotspur Football Club':0,
    'Watford FC':1,
    'West Bromwich Albion':0,
    'West Ham United Football Club': 0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2015, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2015].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
60,2288.0,2015,Swansea City,3,2.0,13.0,0,2,4.0,2,...,12.0,12.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0
61,989.0,2015,Association Football Club Bournemouth,4,0.0,16.0,1,0,3.0,6,...,12.0,22.0,-10.0,2.0,6.0,2.0,0.0,5.0,5.0,1.0
62,631.0,2015,Chelsea Football Club,3,2.0,10.0,1,2,1.0,2,...,15.0,19.0,-4.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0
63,31.0,2015,Liverpool Football Club,3,1.0,5.0,0,1,1.0,6,...,9.0,11.0,-2.0,3.0,2.0,5.0,5.0,5.0,5.0,0.0
64,762.0,2015,Newcastle United Football Club,5,2.0,11.0,1,2,1.0,2,...,12.0,22.0,-10.0,1.0,6.0,3.0,0.0,5.0,5.0,0.0
65,512.0,2015,Stoke City,3,0.0,15.0,1,0,7.0,6,...,9.0,12.0,-3.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0
66,11.0,2015,Arsenal Football Club,2,0.0,19.0,1,0,1.0,7,...,18.0,8.0,10.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0
67,1003.0,2015,Leicester City Football Club,1,4.0,2.0,1,1,3.0,24,...,20.0,17.0,3.0,5.0,1.0,4.0,3.0,5.0,5.0,0.0
68,873.0,2015,Crystal Palace Football Club,4,3.0,3.0,0,1,4.0,17,...,12.0,11.0,1.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0
69,1123.0,2015,Norwich City,5,1.0,18.0,1,0,12.0,17,...,14.0,21.0,-7.0,2.0,5.0,3.0,0.0,5.0,5.0,1.0


In [2436]:
#2016 
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Association Football Club Bournemouth':0,
    'Burnley FC':1,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Hull City':1,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Middlesbrough FC':1,
    'Southampton Football Club':0,
    'Stoke City':0,
    'Sunderland AFC':1,
    'Swansea City':0,
    'Tottenham Hotspur Football Club':0,
    'Watford FC':0,
    'West Bromwich Albion':0,
    'West Ham United Football Club': 0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2016, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2016].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
80,985.0,2016,Manchester United Football Club,2,3.0,1.0,0,1,1.0,17,...,13.0,12.0,1.0,4.0,3.0,3.0,1.0,5.0,5.0,0.0
81,984.0,2016,West Bromwich Albion,3,1.0,6.0,0,1,3.0,6,...,10.0,14.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0
82,11.0,2016,Arsenal Football Club,2,3.0,14.0,1,0,1.0,29,...,23.0,10.0,13.0,7.0,1.0,2.0,9.0,5.0,5.0,0.0
83,631.0,2016,Chelsea Football Club,1,2.0,4.0,1,1,7.0,16,...,21.0,9.0,12.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0
84,379.0,2016,West Ham United Football Club,3,1.0,16.0,0,0,7.0,16,...,10.0,19.0,-9.0,3.0,6.0,1.0,0.0,5.0,5.0,0.0
85,2288.0,2016,Swansea City,4,1.0,7.0,0,1,5.0,6,...,9.0,18.0,-9.0,1.0,7.0,2.0,0.0,5.0,5.0,0.0
86,1132.0,2016,Burnley FC,4,0.0,19.0,1,0,13.0,6,...,8.0,13.0,-5.0,3.0,5.0,2.0,2.0,6.0,4.0,1.0
87,29.0,2016,Everton Football Club,3,1.0,8.0,1,2,14.0,1,...,15.0,8.0,7.0,5.0,2.0,3.0,1.0,5.0,5.0,0.0
88,31.0,2016,Liverpool Football Club,2,4.0,2.0,0,1,5.0,29,...,24.0,13.0,11.0,7.0,1.0,2.0,8.0,4.0,6.0,0.0
89,873.0,2016,Crystal Palace Football Club,4,0.0,18.0,1,0,1.0,6,...,14.0,16.0,-2.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0


In [2437]:
#2017 
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Association Football Club Bournemouth':0,
    'Brighton and Hove Albion Football Club':1,
    'Burnley FC':0,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Huddersfield Town':1,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':1,
    'Southampton Football Club':0,
    'Stoke City':0,
    'Swansea City':0,
    'Tottenham Hotspur Football Club':0,
    'Watford FC':0,
    'West Bromwich Albion':0,
    'West Ham United Football Club': 0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2017, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2017].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
100,1237.0,2017,Brighton and Hove Albion Football Club,4,0.0,18.0,1,0,5.0,7,...,10.0,11.0,-1.0,3.0,4.0,3.0,3.0,5.0,5.0,1.0
101,631.0,2017,Chelsea Football Club,2,2.0,14.0,1,0,14.0,23,...,18.0,10.0,8.0,6.0,3.0,1.0,2.0,5.0,5.0,0.0
102,1110.0,2017,Huddersfield Town,4,3.0,2.0,0,1,1.0,8,...,7.0,13.0,-6.0,3.0,4.0,3.0,0.0,5.0,5.0,1.0
103,281.0,2017,Manchester City Football Club,1,2.0,4.0,0,1,27.0,7,...,35.0,6.0,29.0,9.0,0.0,1.0,10.0,5.0,5.0,0.0
104,31.0,2017,Liverpool Football Club,2,3.0,9.0,0,2,5.0,3,...,17.0,16.0,1.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0
105,180.0,2017,Southampton Football Club,4,0.0,11.0,1,2,1.0,0,...,9.0,10.0,-1.0,3.0,3.0,4.0,3.0,6.0,4.0,0.0
106,984.0,2017,West Bromwich Albion,5,1.0,8.0,1,1,7.0,6,...,9.0,13.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0
107,985.0,2017,Manchester United Football Club,2,4.0,1.0,1,1,1.0,9,...,23.0,4.0,19.0,7.0,1.0,2.0,1.0,5.0,5.0,0.0
108,512.0,2017,Stoke City,5,0.0,16.0,0,0,22.0,6,...,11.0,20.0,-9.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0
109,2288.0,2017,Swansea City,5,0.0,12.0,0,2,5.0,0,...,7.0,12.0,-5.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0


In [2438]:
#2018 
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Association Football Club Bournemouth':0,
    'Brighton and Hove Albion Football Club':0,
    'Burnley FC':0,
    'Cardiff City':1,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Fulham Football Club':1,
    'Huddersfield Town':0,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Southampton Football Club':0,
    'Tottenham Hotspur Football Club':0,
    'Watford FC':0,
    'West Ham United Football Club': 0,
    'Wolverhampton Wanderers Football Club':1
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2018, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2018].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
120,1110.0,2018,Huddersfield Town,5,0.0,19.0,1,0,9.0,8,...,4.0,21.0,-17.0,0.0,7.0,3.0,0.0,5.0,5.0,0.0
121,603.0,2018,Cardiff City,5,0.0,17.0,0,0,7.0,7,...,9.0,23.0,-14.0,1.0,7.0,2.0,0.0,5.0,5.0,1.0
122,631.0,2018,Chelsea Football Club,2,3.0,2.0,0,1,5.0,8,...,24.0,7.0,17.0,7.0,0.0,3.0,10.0,5.0,5.0,0.0
123,1010.0,2018,Watford FC,3,2.0,6.0,1,1,3.0,7,...,16.0,12.0,4.0,6.0,3.0,1.0,2.0,6.0,4.0,0.0
124,180.0,2018,Southampton Football Club,4,0.0,12.0,1,2,14.0,0,...,6.0,14.0,-8.0,1.0,5.0,4.0,2.0,5.0,5.0,0.0
125,985.0,2018,Manchester United Football Club,2,2.0,7.0,1,1,7.0,16,...,17.0,17.0,0.0,5.0,3.0,2.0,3.0,5.0,5.0,0.0
126,543.0,2018,Wolverhampton Wanderers Football Club,3,2.0,10.0,1,2,22.0,2,...,9.0,9.0,0.0,4.0,3.0,3.0,0.0,5.0,5.0,1.0
127,873.0,2018,Crystal Palace Football Club,3,2.0,4.0,0,1,13.0,7,...,7.0,13.0,-6.0,2.0,6.0,2.0,1.0,5.0,5.0,0.0
128,379.0,2018,West Ham United Football Club,3,0.0,20.0,0,0,1.0,9,...,9.0,15.0,-6.0,2.0,6.0,2.0,1.0,5.0,5.0,0.0
129,1237.0,2018,Brighton and Hove Albion Football Club,4,0.0,16.0,0,0,12.0,7,...,11.0,13.0,-2.0,4.0,4.0,2.0,3.0,5.0,5.0,0.0


In [2439]:
#2021
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Brentford Football Club':1,
    'Brighton and Hove Albion Football Club':0,
    'Burnley FC':0,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Leeds United':0,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Norwich City':1,
    'Southampton Football Club':0,
    'Tottenham Hotspur Football Club':0,
    'Watford FC':1,
    'West Ham United Football Club': 0,
    'Wolverhampton Wanderers Football Club':0,
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2021, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2021].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
140,1237.0,2021,Brighton and Hove Albion Football Club,3,2.0,8.0,0,1,14.0,16,...,11.0,11.0,0.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0
141,31.0,2021,Liverpool Football Club,2,3.0,3.0,0,1,5.0,8,...,29.0,8.0,21.0,6.0,0.0,4.0,10.0,5.0,5.0,0.0
142,379.0,2021,West Ham United Football Club,3,4.0,4.0,0,1,1.0,24,...,20.0,11.0,9.0,6.0,2.0,2.0,3.0,5.0,5.0,0.0
143,148.0,2021,Tottenham Hotspur Football Club,2,1.0,9.0,1,1,5.0,6,...,9.0,16.0,-7.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0
144,180.0,2021,Southampton Football Club,4,1.0,16.0,0,0,3.0,17,...,9.0,12.0,-3.0,2.0,3.0,5.0,3.0,5.0,5.0,0.0
145,281.0,2021,Manchester City Football Club,1,0.0,14.0,0,0,5.0,6,...,20.0,6.0,14.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0
146,11.0,2021,Arsenal Football Club,2,0.0,17.0,0,0,1.0,7,...,12.0,13.0,-1.0,5.0,3.0,2.0,7.0,5.0,5.0,0.0
147,1003.0,2021,Leicester City Football Club,3,1.0,10.0,1,1,1.0,6,...,15.0,17.0,-2.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0
148,1010.0,2021,Watford FC,5,3.0,7.0,1,1,7.0,23,...,12.0,18.0,-6.0,3.0,6.0,1.0,0.0,5.0,5.0,1.0
149,543.0,2021,Wolverhampton Wanderers Football Club,3,0.0,13.0,0,0,11.0,6,...,11.0,10.0,1.0,5.0,4.0,1.0,5.0,5.0,5.0,0.0


In [2440]:
#2022
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Association Football Club Bournemouth':1,
    'Brentford Football Club':0,
    'Brighton and Hove Albion Football Club':0,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Fulham Football Club':1,
    'Leeds United':0,
    'Leicester City Football Club':0,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Nottingham Forest Football Club':1,
    'Southampton Football Club':0,
    'Tottenham Hotspur Football Club':0,
    'West Ham United Football Club': 0,
    'Wolverhampton Wanderers Football Club':0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2022, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2022].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
160,931.0,2022,Fulham Football Club,3,2.0,9.0,1,2,1.0,2,...,16.0,19.0,-3.0,4.0,4.0,2.0,0.0,5.0,5.0,1.0
161,29.0,2022,Everton Football Club,4,0.0,15.0,1,0,11.0,6,...,8.0,13.0,-5.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0
162,985.0,2022,Manchester United Football Club,2,1.0,14.0,1,0,5.0,16,...,16.0,18.0,-2.0,5.0,3.0,2.0,1.0,4.0,6.0,0.0
163,1237.0,2022,Brighton and Hove Albion Football Club,2,2.0,7.0,0,1,14.0,16,...,17.0,9.0,8.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0
164,543.0,2022,Wolverhampton Wanderers Football Club,4,1.0,13.0,0,0,5.0,16,...,3.0,14.0,-11.0,1.0,6.0,3.0,0.0,4.0,6.0,0.0
165,11.0,2022,Arsenal Football Club,2,2.0,2.0,0,1,1.0,7,...,27.0,10.0,17.0,9.0,1.0,0.0,4.0,6.0,4.0,0.0
166,631.0,2022,Chelsea Football Club,3,1.0,8.0,0,1,14.0,6,...,14.0,12.0,2.0,5.0,3.0,2.0,3.0,5.0,5.0,0.0
167,31.0,2022,Liverpool Football Club,2,2.0,10.0,0,2,5.0,2,...,22.0,12.0,10.0,3.0,2.0,5.0,0.0,5.0,5.0,0.0
168,281.0,2022,Manchester City Football Club,1,2.0,5.0,0,1,7.0,7,...,37.0,11.0,26.0,8.0,0.0,2.0,10.0,6.0,4.0,0.0
169,989.0,2022,Association Football Club Bournemouth,4,2.0,3.0,1,1,11.0,7,...,8.0,22.0,-14.0,3.0,4.0,3.0,3.0,6.0,4.0,1.0


In [2441]:
#2023
mapeamento_rebaixados = {
    'Arsenal Football Club':0,
    'Aston Villa Football Club':0,
    'Association Football Club Bournemouth':0,
    'Brentford Football Club':0,
    'Brighton and Hove Albion Football Club':0,
    'Burnley FC':1,
    'Chelsea Football Club':0,
    'Crystal Palace Football Club':0,
    'Everton Football Club':0,
    'Fulham Football Club':0,
    'Luton Town':1,
    'Liverpool Football Club':0,
    'Manchester City Football Club':0,
    'Manchester United Football Club':0,
    'Newcastle United Football Club':0,
    'Nottingham Forest Football Club':0,
    'Sheffield United':1,
    'Tottenham Hotspur Football Club':0,
    'West Ham United Football Club': 0,
    'Wolverhampton Wanderers Football Club':0
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2023, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2023].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
180,350.0,2023,Sheffield United,5,0.0,14.0,1,0,22.0,6,...,7.0,29.0,-22.0,0.0,9.0,1.0,0.0,5.0,5.0,1.0
181,762.0,2023,Newcastle United Football Club,3,5.0,1.0,1,1,5.0,19,...,26.0,11.0,15.0,5.0,3.0,2.0,6.0,5.0,5.0,0.0
182,1237.0,2023,Brighton and Hove Albion Football Club,3,4.0,2.0,1,1,1.0,18,...,23.0,19.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0,0.0
183,281.0,2023,Manchester City Football Club,1,3.0,3.0,0,1,1.0,8,...,22.0,7.0,15.0,8.0,2.0,0.0,2.0,4.0,6.0,0.0
184,405.0,2023,Aston Villa Football Club,2,1.0,18.0,0,0,1.0,19,...,26.0,14.0,12.0,7.0,2.0,1.0,6.0,5.0,5.0,0.0
185,985.0,2023,Manchester United Football Club,3,1.0,7.0,1,1,7.0,6,...,11.0,16.0,-5.0,5.0,5.0,0.0,0.0,6.0,4.0,0.0
186,873.0,2023,Crystal Palace Football Club,3,1.0,5.0,0,1,1.0,6,...,8.0,13.0,-5.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0
187,703.0,2023,Nottingham Forest Football Club,4,1.0,19.0,0,0,14.0,16,...,10.0,15.0,-5.0,2.0,4.0,4.0,0.0,4.0,6.0,0.0
188,543.0,2023,Wolverhampton Wanderers Football Club,4,0.0,15.0,0,0,13.0,6,...,13.0,17.0,-4.0,3.0,4.0,3.0,5.0,5.0,5.0,0.0
189,1132.0,2023,Burnley FC,5,0.0,17.0,1,0,11.0,8,...,8.0,25.0,-17.0,1.0,8.0,1.0,0.0,5.0,5.0,1.0


In [2442]:
#2024
mapeamento_rebaixados = {
    'Manchester City Football Club':0,
    'Arsenal Football Club':0,
    'Liverpool Football Club':0,
    'Aston Villa Football Club':0,
    'Chelsea Football Club':0,
    'Manchester United Football Club':0,
    'Tottenham Hotspur Football Club':0,
    'Newcastle United Football Club':0,
    'West Ham United Football Club': 0,
    'Brighton and Hove Albion Football Club':0,
    'Wolverhampton Wanderers Football Club':0,
    'Crystal Palace Football Club':0,
    'Fulham Football Club':0,
    'Everton Football Club':0,
    'Brentford Football Club':0,
    'Association Football Club Bournemouth':0,
    'Nottingham Forest Football Club':0,
    'Leicester City Football Club':1,
    'Ipswich Town Football Club':1,
    'Southampton Football Club':1
}
# Aplicando o mapeamento usando loc
df_novo_final.loc[df_novo_final['season'] == 2024, 'recem_promovido'] = df_novo_final['name'].map(mapeamento_rebaixados)
#Vamos olhar o resultado
df_novo_final[df_novo_final['season'] == 2024].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_pro,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido
200,11.0,2024,Arsenal Football Club,2,2.0,2.0,1,1,5.0,7,...,17.0,11.0,6.0,5.0,2.0,3.0,0.0,5.0,5.0,0.0
201,1148.0,2024,Brentford Football Club,3,2.0,6.0,1,1,5.0,16,...,19.0,20.0,-1.0,4.0,5.0,1.0,0.0,5.0,5.0,0.0
202,1237.0,2024,Brighton and Hove Albion Football Club,3,3.0,1.0,0,1,1.0,8,...,17.0,14.0,3.0,4.0,2.0,4.0,0.0,5.0,5.0,0.0
203,931.0,2024,Fulham Football Club,3,0.0,15.0,0,0,1.0,6,...,14.0,13.0,1.0,4.0,3.0,3.0,2.0,5.0,5.0,0.0
204,281.0,2024,Manchester City Football Club,2,2.0,4.0,0,1,14.0,7,...,21.0,11.0,10.0,7.0,1.0,2.0,0.0,5.0,5.0,0.0
205,543.0,2024,Wolverhampton Wanderers Football Club,4,0.0,19.0,0,0,1.0,7,...,14.0,27.0,-13.0,0.0,7.0,3.0,2.0,5.0,5.0,0.0
206,703.0,2024,Nottingham Forest Football Club,3,1.0,11.0,1,2,1.0,1,...,14.0,7.0,7.0,5.0,1.0,4.0,4.0,5.0,5.0,0.0
207,1003.0,2024,Leicester City Football Club,5,1.0,10.0,1,2,1.0,1,...,14.0,18.0,-4.0,2.0,4.0,4.0,1.0,5.0,5.0,1.0
208,677.0,2024,Ipswich Town Football Club,5,0.0,18.0,1,0,1.0,7,...,10.0,21.0,-11.0,0.0,5.0,5.0,1.0,5.0,5.0,1.0
209,148.0,2024,Tottenham Hotspur Football Club,4,1.0,12.0,0,2,5.0,1,...,22.0,11.0,11.0,5.0,4.0,1.0,1.0,5.0,5.0,0.0


Vamos agora criar um novo dataframe Com as novas colunas e ver como será a previsão dele e a acurácia

In [2443]:
#Vamos ver as colunas que mais se destacam
df_novo_final.columns.to_list()

['club_id',
 'season',
 'name',
 'last_position',
 'own_goals_1',
 'own_position_1',
 'hosting_1',
 'is_win_1',
 'home_club_formation_1',
 'aggregate_1',
 'opponent_goals_1',
 'opponent_position_1',
 'away_club_formation_1',
 'own_goals_2',
 'own_position_2',
 'hosting_2',
 'is_win_2',
 'home_club_formation_2',
 'aggregate_2',
 'opponent_goals_2',
 'opponent_position_2',
 'away_club_formation_2',
 'own_goals_3',
 'own_position_3',
 'hosting_3',
 'is_win_3',
 'home_club_formation_3',
 'aggregate_3',
 'opponent_goals_3',
 'opponent_position_3',
 'away_club_formation_3',
 'own_goals_4',
 'own_position_4',
 'hosting_4',
 'is_win_4',
 'home_club_formation_4',
 'aggregate_4',
 'opponent_goals_4',
 'opponent_position_4',
 'away_club_formation_4',
 'own_goals_5',
 'own_position_5',
 'hosting_5',
 'is_win_5',
 'home_club_formation_5',
 'aggregate_5',
 'opponent_goals_5',
 'opponent_position_5',
 'away_club_formation_5',
 'own_goals_6',
 'own_position_6',
 'hosting_6',
 'is_win_6',
 'home_club_f

In [2444]:
#Novo df:
df_previa = df_novo_final[['season', 'last_position', 'total_points_10', 'vitorias', 'derrotas', 'empates', 'casa', 'fora','gols_pro','gols_sofridos','saldo_gols','jogos_sem_perder','recem_promovido']].copy()
#Vamos ver o novo df
df_previa.head(20)

Unnamed: 0,season,last_position,total_points_10,vitorias,derrotas,empates,casa,fora,gols_pro,gols_sofridos,saldo_gols,jogos_sem_perder,recem_promovido
0,2012,4,4.0,1.0,8.0,1.0,5.0,5.0,14.0,28.0,-14.0,0.0,1.0
1,2012,4,9.0,1.0,3.0,6.0,4.0,6.0,8.0,10.0,-2.0,0.0,0.0
2,2012,3,15.0,4.0,3.0,3.0,5.0,5.0,21.0,16.0,5.0,4.0,0.0
3,2012,4,9.0,2.0,5.0,3.0,4.0,6.0,8.0,14.0,-6.0,2.0,0.0
4,2012,3,15.0,4.0,3.0,3.0,6.0,4.0,13.0,11.0,2.0,1.0,1.0
5,2012,3,17.0,5.0,3.0,2.0,6.0,4.0,15.0,11.0,4.0,1.0,0.0
6,2012,4,12.0,2.0,2.0,6.0,5.0,5.0,9.0,9.0,0.0,0.0,0.0
7,2012,2,23.0,7.0,1.0,2.0,5.0,5.0,22.0,10.0,12.0,1.0,0.0
8,2012,5,5.0,0.0,5.0,5.0,4.0,6.0,12.0,21.0,-9.0,2.0,1.0
9,2012,3,10.0,2.0,4.0,4.0,5.0,5.0,8.0,18.0,-10.0,3.0,0.0


In [2445]:
#Como já importamos as bibliotecas, podemos começar separando os dados
X = df_previa.drop(columns=['last_position'])
#Vamos separar nosso x test e x train de acordo com as temporadas.
X_train = X[X['season'] != 2014]
X_test = X[X['season'] == 2014]
# Agora vamos separar y para ser apenas a coluna last_position
y_train = df_previa[df_previa['season'] != 2014]['last_position']
y_test = df_previa[df_previa['season'] == 2014]['last_position']

In [2446]:
#Agora vamos aplicar o scaler nos dados de treino e teste
scaler = MinMaxScaler(feature_range=(0,1))

In [2447]:
#Vamos aplicar o scaler nos dados de treino e teste
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [2448]:
#Vamos definir o modelo KNN
knn = KNeighborsClassifier(n_neighbors=4)

In [2449]:
#Vamos aplicar o knn nos dados de treino
knn.fit(X_train, y_train)

In [2450]:
#vamos fazer as predições
y_pred = knn.predict(X_test)
print(f"Predições: {y_pred}")

Predições: [2 4 3 3 1 3 5 3 3 3 3 5 1 3 5 4 1 2 5 3]


In [2451]:
#Agora vamos calcular a acurácia do modelo
from sklearn.metrics import accuracy_score
acuracia = accuracy_score(y_test, y_pred)
print(f"Acurácia do modelo: {acuracia:.2f}")

Acurácia do modelo: 0.40


In [2452]:
#Vamos recuperar os dados como nome e códigos para ver se as predições estão corretas
df_final_com_pontos = pd.read_csv('df_final_premier_league.csv')
#Vamos criar um novo dataframe com as predições
df_predicoes = pd.DataFrame({
    'name': df_final_com_pontos[df_final_com_pontos['season'] == 2014]['name'].values,
    'last_position': y_pred
})
#Vamos ver o dataframe de predições
print(df_predicoes.head(20))


                               name  last_position
0             Arsenal Football Club              2
1               Queens Park Rangers              4
2              West Bromwich Albion              3
3           Liverpool Football Club              3
4         Southampton Football Club              1
5                        Stoke City              3
6      Crystal Palace Football Club              5
7                         Hull City              3
8    Newcastle United Football Club              3
9     West Ham United Football Club              3
10                     Swansea City              3
11                   Sunderland AFC              5
12            Chelsea Football Club              1
13  Tottenham Hotspur Football Club              3
14                       Burnley FC              5
15     Leicester City Football Club              4
16            Everton Football Club              1
17    Manchester City Football Club              2
18        Aston Villa Football 

In [2453]:
#Vamos printar as colunas para vermos as mais interessantes
df_novo_final.columns.to_list()

['club_id',
 'season',
 'name',
 'last_position',
 'own_goals_1',
 'own_position_1',
 'hosting_1',
 'is_win_1',
 'home_club_formation_1',
 'aggregate_1',
 'opponent_goals_1',
 'opponent_position_1',
 'away_club_formation_1',
 'own_goals_2',
 'own_position_2',
 'hosting_2',
 'is_win_2',
 'home_club_formation_2',
 'aggregate_2',
 'opponent_goals_2',
 'opponent_position_2',
 'away_club_formation_2',
 'own_goals_3',
 'own_position_3',
 'hosting_3',
 'is_win_3',
 'home_club_formation_3',
 'aggregate_3',
 'opponent_goals_3',
 'opponent_position_3',
 'away_club_formation_3',
 'own_goals_4',
 'own_position_4',
 'hosting_4',
 'is_win_4',
 'home_club_formation_4',
 'aggregate_4',
 'opponent_goals_4',
 'opponent_position_4',
 'away_club_formation_4',
 'own_goals_5',
 'own_position_5',
 'hosting_5',
 'is_win_5',
 'home_club_formation_5',
 'aggregate_5',
 'opponent_goals_5',
 'opponent_position_5',
 'away_club_formation_5',
 'own_goals_6',
 'own_position_6',
 'hosting_6',
 'is_win_6',
 'home_club_f

Agora vamos criar uma métrica da força dos times fornecida pela IA como se fosse antes do campeonato

In [2454]:
#Força dos times em 2012:
mapeamento_forca = {
    'Southampton Football Club':1,
    'Stoke City':2,
    'Fulham Football Club':3,
    'Aston Villa Football Club':3,
    'West Ham United Football Club': 2,
    'West Bromwich Albion':3,
    'Sunderland AFC':3,
    'Chelsea Football Club':5,
    'Reading FC':1,
    'Norwich City':2,
    'Arsenal Football Club':4,
    'Manchester City Football Club':5,
    'Newcastle United Football Club':3,
    'Liverpool Football Club':4,
    'Everton Football Club':3,
    'Queens Park Rangers':2,
    'Swansea City':2,
    'Manchester United Football Club':5,
    'Wigan Athletic':1,
    'Tottenham Hotspur Football Club':4
}

#Vamos mapear apenas para season 2012, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2012, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2012, 'name'].map(mapeamento_forca)

df_novo_final.loc[df_novo_final['season']==2012].head(20)



Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
0,180.0,2012,Southampton Football Club,4,2.0,13.0,0,0,0.0,23,...,28.0,-14.0,1.0,8.0,1.0,0.0,5.0,5.0,1.0,1.0
1,512.0,2012,Stoke City,4,1.0,9.0,0,2,0.0,1,...,10.0,-2.0,1.0,3.0,6.0,0.0,4.0,6.0,0.0,2.0
2,931.0,2012,Fulham Football Club,3,5.0,1.0,1,1,0.0,10,...,16.0,5.0,4.0,3.0,3.0,4.0,5.0,5.0,0.0,3.0
3,405.0,2012,Aston Villa Football Club,4,0.0,15.0,0,0,0.0,6,...,14.0,-6.0,2.0,5.0,3.0,2.0,4.0,6.0,0.0,3.0
4,379.0,2012,West Ham United Football Club,3,1.0,8.0,1,1,0.0,6,...,11.0,2.0,4.0,3.0,3.0,1.0,6.0,4.0,1.0,2.0
5,984.0,2012,West Bromwich Albion,3,3.0,3.0,1,1,0.0,8,...,11.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0,0.0,3.0
6,289.0,2012,Sunderland AFC,4,0.0,12.0,0,2,0.0,0,...,9.0,0.0,2.0,2.0,6.0,0.0,5.0,5.0,0.0,3.0
7,631.0,2012,Chelsea Football Club,2,2.0,4.0,0,1,0.0,7,...,10.0,12.0,7.0,1.0,2.0,1.0,5.0,5.0,0.0,5.0
8,1032.0,2012,Reading FC,5,1.0,10.0,1,2,0.0,1,...,21.0,-9.0,0.0,5.0,5.0,2.0,4.0,6.0,1.0,1.0
9,1123.0,2012,Norwich City,3,0.0,20.0,0,0,0.0,10,...,18.0,-10.0,2.0,4.0,4.0,3.0,5.0,5.0,0.0,2.0


In [2455]:
#Agora vamos fazer o mesmo para a temporada 2013
df_novo_final.loc[df_novo_final['season']==2013].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
20,512.0,2013,Stoke City,3,0.0,13.0,0,0,6.0,6,...,11.0,-4.0,2.0,5.0,3.0,1.0,5.0,5.0,0.0,
21,984.0,2013,West Bromwich Albion,4,0.0,15.0,1,0,3.0,6,...,10.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0,
22,289.0,2013,Sunderland AFC,4,0.0,12.0,1,0,1.0,6,...,22.0,-15.0,1.0,8.0,1.0,0.0,5.0,5.0,0.0,
23,379.0,2013,West Ham United Football Club,4,2.0,4.0,1,1,1.0,7,...,8.0,0.0,2.0,4.0,4.0,2.0,5.0,5.0,0.0,
24,281.0,2013,Manchester City Football Club,1,4.0,1.0,1,1,3.0,9,...,11.0,17.0,6.0,3.0,1.0,1.0,5.0,5.0,0.0,
25,1123.0,2013,Norwich City,5,2.0,11.0,1,2,1.0,2,...,20.0,-14.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0,
26,762.0,2013,Newcastle United Football Club,3,0.0,20.0,0,0,3.0,9,...,16.0,-2.0,4.0,4.0,2.0,1.0,5.0,5.0,0.0,
27,180.0,2013,Southampton Football Club,3,1.0,8.0,0,1,13.0,6,...,4.0,7.0,5.0,1.0,4.0,7.0,5.0,5.0,0.0,
28,11.0,2013,Arsenal Football Club,2,1.0,16.0,1,0,1.0,17,...,9.0,13.0,8.0,1.0,1.0,9.0,5.0,5.0,0.0,
29,405.0,2013,Aston Villa Football Club,4,3.0,3.0,0,1,5.0,17,...,12.0,-3.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0,


In [2456]:

mapeamento_forca = {
    'Stoke City':2,
    'West Bromwich Albion':3,
    'Sunderland AFC':1,
    'West Ham United Football Club': 2,
    'Manchester City Football Club':5,
    'Norwich City':2,
    'Newcastle United Football Club':3,
    'Southampton Football Club':3,
    'Arsenal Football Club':4,
    'Aston Villa Football Club':3,
    'Crystal Palace Football Club':1,
    'Everton Football Club':3,
    'Tottenham Hotspur Football Club':4,
    'Swansea City':3,
    'Manchester United Football Club':5,
    'Chelsea Football Club':5,
    'Hull City':1,
    'Liverpool Football Club':4,
    'Cardiff City':2,
    'Fulham Football Club':2,
}

#Vamos mapear apenas para season 2013, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2013, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2013, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2013].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
20,512.0,2013,Stoke City,3,0.0,13.0,0,0,6.0,6,...,11.0,-4.0,2.0,5.0,3.0,1.0,5.0,5.0,0.0,2.0
21,984.0,2013,West Bromwich Albion,4,0.0,15.0,1,0,3.0,6,...,10.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0,3.0
22,289.0,2013,Sunderland AFC,4,0.0,12.0,1,0,1.0,6,...,22.0,-15.0,1.0,8.0,1.0,0.0,5.0,5.0,0.0,1.0
23,379.0,2013,West Ham United Football Club,4,2.0,4.0,1,1,1.0,7,...,8.0,0.0,2.0,4.0,4.0,2.0,5.0,5.0,0.0,2.0
24,281.0,2013,Manchester City Football Club,1,4.0,1.0,1,1,3.0,9,...,11.0,17.0,6.0,3.0,1.0,1.0,5.0,5.0,0.0,5.0
25,1123.0,2013,Norwich City,5,2.0,11.0,1,2,1.0,2,...,20.0,-14.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0,2.0
26,762.0,2013,Newcastle United Football Club,3,0.0,20.0,0,0,3.0,9,...,16.0,-2.0,4.0,4.0,2.0,1.0,5.0,5.0,0.0,3.0
27,180.0,2013,Southampton Football Club,3,1.0,8.0,0,1,13.0,6,...,4.0,7.0,5.0,1.0,4.0,7.0,5.0,5.0,0.0,3.0
28,11.0,2013,Arsenal Football Club,2,1.0,16.0,1,0,1.0,17,...,9.0,13.0,8.0,1.0,1.0,9.0,5.0,5.0,0.0,4.0
29,405.0,2013,Aston Villa Football Club,4,3.0,3.0,0,1,5.0,17,...,12.0,-3.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0,3.0


In [2457]:
#Vamos ver para 2014
df_novo_final.loc[df_novo_final['season']==2014].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
40,11.0,2014,Arsenal Football Club,2,2.0,3.0,1,1,7.0,16,...,11.0,7.0,4.0,1.0,5.0,3.0,5.0,5.0,0.0,
41,1039.0,2014,Queens Park Rangers,5,0.0,18.0,1,0,17.0,6,...,20.0,-11.0,2.0,7.0,1.0,0.0,5.0,5.0,1.0,
42,984.0,2014,West Bromwich Albion,4,2.0,11.0,1,2,1.0,2,...,13.0,0.0,3.0,3.0,4.0,3.0,5.0,5.0,0.0,
43,31.0,2014,Liverpool Football Club,2,2.0,4.0,1,1,1.0,16,...,13.0,0.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0,
44,180.0,2014,Southampton Football Club,3,1.0,13.0,0,0,1.0,16,...,5.0,16.0,7.0,2.0,1.0,3.0,5.0,5.0,0.0,
45,512.0,2014,Stoke City,3,0.0,17.0,1,0,1.0,6,...,12.0,-2.0,3.0,4.0,3.0,1.0,5.0,5.0,0.0,
46,873.0,2014,Crystal Palace Football Club,3,1.0,14.0,0,0,1.0,16,...,19.0,-5.0,2.0,5.0,3.0,0.0,5.0,5.0,0.0,
47,3008.0,2014,Hull City,5,1.0,8.0,0,1,11.0,6,...,14.0,-1.0,2.0,3.0,5.0,0.0,5.0,5.0,0.0,
48,762.0,2014,Newcastle United Football Club,4,0.0,20.0,1,0,5.0,7,...,15.0,-4.0,3.0,3.0,4.0,4.0,5.0,5.0,0.0,
49,379.0,2014,West Ham United Football Club,3,0.0,16.0,1,0,1.0,6,...,14.0,5.0,5.0,3.0,2.0,4.0,5.0,5.0,0.0,


In [2458]:
#2014
mapeamento_forca = {
    'Arsenal Football Club':5,
    'Queens Park Rangers':2,
    'West Bromwich Albion':2,
    'Liverpool Football Club':4,
    'Southampton Football Club':3,
    'Stoke City':3,
    'Crystal Palace Football Club':2,
    'Hull City':2,
    'Newcastle United Football Club':3,
    'West Ham United Football Club': 3,
    'Swansea City':3,
    'Sunderland AFC':3,
    'Chelsea Football Club':5,
    'Tottenham Hotspur Football Club':4,
    'Burnley FC':1,
    'Leicester City Football Club':2,
    'Everton Football Club':4,
    'Manchester City Football Club':5,
    'Aston Villa Football Club':2,
    'Manchester United Football Club':5
}
#Vamos mapear apenas para season 2014, portanto usaremos loc    
df_novo_final.loc[df_novo_final['season'] == 2014, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2014, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2014].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
40,11.0,2014,Arsenal Football Club,2,2.0,3.0,1,1,7.0,16,...,11.0,7.0,4.0,1.0,5.0,3.0,5.0,5.0,0.0,5.0
41,1039.0,2014,Queens Park Rangers,5,0.0,18.0,1,0,17.0,6,...,20.0,-11.0,2.0,7.0,1.0,0.0,5.0,5.0,1.0,2.0
42,984.0,2014,West Bromwich Albion,4,2.0,11.0,1,2,1.0,2,...,13.0,0.0,3.0,3.0,4.0,3.0,5.0,5.0,0.0,2.0
43,31.0,2014,Liverpool Football Club,2,2.0,4.0,1,1,1.0,16,...,13.0,0.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0,4.0
44,180.0,2014,Southampton Football Club,3,1.0,13.0,0,0,1.0,16,...,5.0,16.0,7.0,2.0,1.0,3.0,5.0,5.0,0.0,3.0
45,512.0,2014,Stoke City,3,0.0,17.0,1,0,1.0,6,...,12.0,-2.0,3.0,4.0,3.0,1.0,5.0,5.0,0.0,3.0
46,873.0,2014,Crystal Palace Football Club,3,1.0,14.0,0,0,1.0,16,...,19.0,-5.0,2.0,5.0,3.0,0.0,5.0,5.0,0.0,2.0
47,3008.0,2014,Hull City,5,1.0,8.0,0,1,11.0,6,...,14.0,-1.0,2.0,3.0,5.0,0.0,5.0,5.0,0.0,2.0
48,762.0,2014,Newcastle United Football Club,4,0.0,20.0,1,0,5.0,7,...,15.0,-4.0,3.0,3.0,4.0,4.0,5.0,5.0,0.0,3.0
49,379.0,2014,West Ham United Football Club,3,0.0,16.0,1,0,1.0,6,...,14.0,5.0,5.0,3.0,2.0,4.0,5.0,5.0,0.0,3.0


In [2459]:
#Agora vamos para temporada 2015
df_novo_final.loc[df_novo_final['season']==2015].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
60,2288.0,2015,Swansea City,3,2.0,13.0,0,2,4.0,2,...,12.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0,
61,989.0,2015,Association Football Club Bournemouth,4,0.0,16.0,1,0,3.0,6,...,22.0,-10.0,2.0,6.0,2.0,0.0,5.0,5.0,1.0,
62,631.0,2015,Chelsea Football Club,3,2.0,10.0,1,2,1.0,2,...,19.0,-4.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0,
63,31.0,2015,Liverpool Football Club,3,1.0,5.0,0,1,1.0,6,...,11.0,-2.0,3.0,2.0,5.0,5.0,5.0,5.0,0.0,
64,762.0,2015,Newcastle United Football Club,5,2.0,11.0,1,2,1.0,2,...,22.0,-10.0,1.0,6.0,3.0,0.0,5.0,5.0,0.0,
65,512.0,2015,Stoke City,3,0.0,15.0,1,0,7.0,6,...,12.0,-3.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0,
66,11.0,2015,Arsenal Football Club,2,0.0,19.0,1,0,1.0,7,...,8.0,10.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0,
67,1003.0,2015,Leicester City Football Club,1,4.0,2.0,1,1,3.0,24,...,17.0,3.0,5.0,1.0,4.0,3.0,5.0,5.0,0.0,
68,873.0,2015,Crystal Palace Football Club,4,3.0,3.0,0,1,4.0,17,...,11.0,1.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0,
69,1123.0,2015,Norwich City,5,1.0,18.0,1,0,12.0,17,...,21.0,-7.0,2.0,5.0,3.0,0.0,5.0,5.0,1.0,


In [2460]:
#2015:
mapeamento_forca = {
    'Chelsea Football Club':5,
    'Manchester City Football Club':5,
    'Arsenal Football Club':5,
    'Manchester United Football Club':5,
    'Liverpool Football Club':4,
    'Tottenham Hotspur Football Club':4,
    'Everton Football Club':4,
    'Southampton Football Club':3,
    'Stoke City':3,
    'Swansea City':3,
    'Crystal Palace Football Club':3,
    'West Ham United Football Club': 3,
    'Newcastle United Football Club':3,
    'West Bromwich Albion':3,
    'Leicester City Football Club':2,
    'Aston Villa Football Club':2,
    'Sunderland AFC':2,
    'Norwich City':2,
    'Association Football Club Bournemouth':2,
    'Watford FC':1
}

#Vamos mapear apenas para season 2015, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2015, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2015, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2015].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
60,2288.0,2015,Swansea City,3,2.0,13.0,0,2,4.0,2,...,12.0,0.0,3.0,3.0,4.0,1.0,5.0,5.0,0.0,3.0
61,989.0,2015,Association Football Club Bournemouth,4,0.0,16.0,1,0,3.0,6,...,22.0,-10.0,2.0,6.0,2.0,0.0,5.0,5.0,1.0,2.0
62,631.0,2015,Chelsea Football Club,3,2.0,10.0,1,2,1.0,2,...,19.0,-4.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0,5.0
63,31.0,2015,Liverpool Football Club,3,1.0,5.0,0,1,1.0,6,...,11.0,-2.0,3.0,2.0,5.0,5.0,5.0,5.0,0.0,4.0
64,762.0,2015,Newcastle United Football Club,5,2.0,11.0,1,2,1.0,2,...,22.0,-10.0,1.0,6.0,3.0,0.0,5.0,5.0,0.0,3.0
65,512.0,2015,Stoke City,3,0.0,15.0,1,0,7.0,6,...,12.0,-3.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0,3.0
66,11.0,2015,Arsenal Football Club,2,0.0,19.0,1,0,1.0,7,...,8.0,10.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0,5.0
67,1003.0,2015,Leicester City Football Club,1,4.0,2.0,1,1,3.0,24,...,17.0,3.0,5.0,1.0,4.0,3.0,5.0,5.0,0.0,2.0
68,873.0,2015,Crystal Palace Football Club,4,3.0,3.0,0,1,4.0,17,...,11.0,1.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0,3.0
69,1123.0,2015,Norwich City,5,1.0,18.0,1,0,12.0,17,...,21.0,-7.0,2.0,5.0,3.0,0.0,5.0,5.0,1.0,2.0


In [2461]:
#Vamos ver 2016 agora 
df_novo_final.loc[df_novo_final['season']==2016].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
80,985.0,2016,Manchester United Football Club,2,3.0,1.0,0,1,1.0,17,...,12.0,1.0,4.0,3.0,3.0,1.0,5.0,5.0,0.0,
81,984.0,2016,West Bromwich Albion,3,1.0,6.0,0,1,3.0,6,...,14.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,
82,11.0,2016,Arsenal Football Club,2,3.0,14.0,1,0,1.0,29,...,10.0,13.0,7.0,1.0,2.0,9.0,5.0,5.0,0.0,
83,631.0,2016,Chelsea Football Club,1,2.0,4.0,1,1,7.0,16,...,9.0,12.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0,
84,379.0,2016,West Ham United Football Club,3,1.0,16.0,0,0,7.0,16,...,19.0,-9.0,3.0,6.0,1.0,0.0,5.0,5.0,0.0,
85,2288.0,2016,Swansea City,4,1.0,7.0,0,1,5.0,6,...,18.0,-9.0,1.0,7.0,2.0,0.0,5.0,5.0,0.0,
86,1132.0,2016,Burnley FC,4,0.0,19.0,1,0,13.0,6,...,13.0,-5.0,3.0,5.0,2.0,2.0,6.0,4.0,1.0,
87,29.0,2016,Everton Football Club,3,1.0,8.0,1,2,14.0,1,...,8.0,7.0,5.0,2.0,3.0,1.0,5.0,5.0,0.0,
88,31.0,2016,Liverpool Football Club,2,4.0,2.0,0,1,5.0,29,...,13.0,11.0,7.0,1.0,2.0,8.0,4.0,6.0,0.0,
89,873.0,2016,Crystal Palace Football Club,4,0.0,18.0,1,0,1.0,6,...,16.0,-2.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0,


In [2462]:
#2016:
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Manchester United Football Club':5,
    'Chelsea Football Club':5,
    'Arsenal Football Club':5,
    'Tottenham Hotspur Football Club':4,
    'Liverpool Football Club':4,
    'Everton Football Club':4,
    'Southampton Football Club':3,
    'Leicester City Football Club':4,
    'West Ham United Football Club': 3,
    'Stoke City':3,
    'Sunderland AFC':3,
    'Crystal Palace Football Club':3,
    'West Bromwich Albion':2,
    'Swansea City':2,
    'Association Football Club Bournemouth':2,
    'Middlesbrough FC':2,
    'Burnley FC':2,
    'Hull City':1,
    'Watford FC':2
}
#Vamos mapear apenas para season 2016, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2016, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2016, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2016].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
80,985.0,2016,Manchester United Football Club,2,3.0,1.0,0,1,1.0,17,...,12.0,1.0,4.0,3.0,3.0,1.0,5.0,5.0,0.0,5.0
81,984.0,2016,West Bromwich Albion,3,1.0,6.0,0,1,3.0,6,...,14.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,2.0
82,11.0,2016,Arsenal Football Club,2,3.0,14.0,1,0,1.0,29,...,10.0,13.0,7.0,1.0,2.0,9.0,5.0,5.0,0.0,5.0
83,631.0,2016,Chelsea Football Club,1,2.0,4.0,1,1,7.0,16,...,9.0,12.0,7.0,2.0,1.0,4.0,5.0,5.0,0.0,5.0
84,379.0,2016,West Ham United Football Club,3,1.0,16.0,0,0,7.0,16,...,19.0,-9.0,3.0,6.0,1.0,0.0,5.0,5.0,0.0,3.0
85,2288.0,2016,Swansea City,4,1.0,7.0,0,1,5.0,6,...,18.0,-9.0,1.0,7.0,2.0,0.0,5.0,5.0,0.0,2.0
86,1132.0,2016,Burnley FC,4,0.0,19.0,1,0,13.0,6,...,13.0,-5.0,3.0,5.0,2.0,2.0,6.0,4.0,1.0,2.0
87,29.0,2016,Everton Football Club,3,1.0,8.0,1,2,14.0,1,...,8.0,7.0,5.0,2.0,3.0,1.0,5.0,5.0,0.0,4.0
88,31.0,2016,Liverpool Football Club,2,4.0,2.0,0,1,5.0,29,...,13.0,11.0,7.0,1.0,2.0,8.0,4.0,6.0,0.0,4.0
89,873.0,2016,Crystal Palace Football Club,4,0.0,18.0,1,0,1.0,6,...,16.0,-2.0,3.0,5.0,2.0,0.0,5.0,5.0,0.0,3.0


In [2463]:
#Vamos para 2017
df_novo_final.loc[df_novo_final['season']==2017].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
100,1237.0,2017,Brighton and Hove Albion Football Club,4,0.0,18.0,1,0,5.0,7,...,11.0,-1.0,3.0,4.0,3.0,3.0,5.0,5.0,1.0,
101,631.0,2017,Chelsea Football Club,2,2.0,14.0,1,0,14.0,23,...,10.0,8.0,6.0,3.0,1.0,2.0,5.0,5.0,0.0,
102,1110.0,2017,Huddersfield Town,4,3.0,2.0,0,1,1.0,8,...,13.0,-6.0,3.0,4.0,3.0,0.0,5.0,5.0,1.0,
103,281.0,2017,Manchester City Football Club,1,2.0,4.0,0,1,27.0,7,...,6.0,29.0,9.0,0.0,1.0,10.0,5.0,5.0,0.0,
104,31.0,2017,Liverpool Football Club,2,3.0,9.0,0,2,5.0,3,...,16.0,1.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0,
105,180.0,2017,Southampton Football Club,4,0.0,11.0,1,2,1.0,0,...,10.0,-1.0,3.0,3.0,4.0,3.0,6.0,4.0,0.0,
106,984.0,2017,West Bromwich Albion,5,1.0,8.0,1,1,7.0,6,...,13.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,
107,985.0,2017,Manchester United Football Club,2,4.0,1.0,1,1,1.0,9,...,4.0,19.0,7.0,1.0,2.0,1.0,5.0,5.0,0.0,
108,512.0,2017,Stoke City,5,0.0,16.0,0,0,22.0,6,...,20.0,-9.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0,
109,2288.0,2017,Swansea City,5,0.0,12.0,0,2,5.0,0,...,12.0,-5.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0,


In [2464]:
#2017:
mapeamento_forca = {
    'Chelsea Football Club':5,
    'Manchester City Football Club':5,
    'Manchester United Football Club':5,
    'Tottenham Hotspur Football Club':5,
    'Liverpool Football Club':4,
    'Arsenal Football Club':4,
    'Everton Football Club':4,
    'Leicester City Football Club':3,
    'Southampton Football Club':3,
    'West Ham United Football Club': 3,
    'Stoke City':3,
    'Crystal Palace Football Club':3,
    'West Bromwich Albion':3,
    'Watford FC':3,
    'Association Football Club Bournemouth':3,
    'Swansea City':2,
    'Burnley FC':2,
    'Newcastle United Football Club':2,
    'Brighton and Hove Albion Football Club':1,
    'Huddersfield Town':1
}
#Vamos mapear apenas para season 2017, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2017, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2017, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2017].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
100,1237.0,2017,Brighton and Hove Albion Football Club,4,0.0,18.0,1,0,5.0,7,...,11.0,-1.0,3.0,4.0,3.0,3.0,5.0,5.0,1.0,1.0
101,631.0,2017,Chelsea Football Club,2,2.0,14.0,1,0,14.0,23,...,10.0,8.0,6.0,3.0,1.0,2.0,5.0,5.0,0.0,5.0
102,1110.0,2017,Huddersfield Town,4,3.0,2.0,0,1,1.0,8,...,13.0,-6.0,3.0,4.0,3.0,0.0,5.0,5.0,1.0,1.0
103,281.0,2017,Manchester City Football Club,1,2.0,4.0,0,1,27.0,7,...,6.0,29.0,9.0,0.0,1.0,10.0,5.0,5.0,0.0,5.0
104,31.0,2017,Liverpool Football Club,2,3.0,9.0,0,2,5.0,3,...,16.0,1.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0,4.0
105,180.0,2017,Southampton Football Club,4,0.0,11.0,1,2,1.0,0,...,10.0,-1.0,3.0,3.0,4.0,3.0,6.0,4.0,0.0,3.0
106,984.0,2017,West Bromwich Albion,5,1.0,8.0,1,1,7.0,6,...,13.0,-4.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,3.0
107,985.0,2017,Manchester United Football Club,2,4.0,1.0,1,1,1.0,9,...,4.0,19.0,7.0,1.0,2.0,1.0,5.0,5.0,0.0,5.0
108,512.0,2017,Stoke City,5,0.0,16.0,0,0,22.0,6,...,20.0,-9.0,3.0,5.0,2.0,1.0,5.0,5.0,0.0,3.0
109,2288.0,2017,Swansea City,5,0.0,12.0,0,2,5.0,0,...,12.0,-5.0,2.0,6.0,2.0,0.0,5.0,5.0,0.0,2.0


In [2465]:

#mapeamento 2018:
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Liverpool Football Club':5,
    'Chelsea Football Club':5,
    'Manchester United Football Club':4,
    'Arsenal Football Club':4,
    'Tottenham Hotspur Football Club':4,
    'Everton Football Club':3,
    'Leicester City Football Club':3,
    'West Ham United Football Club': 3,
    'Wolverhampton Wanderers Football Club':3,
    'Newcastle United Football Club':3,
    'Crystal Palace Football Club':3,
    'Association Football Club Bournemouth':3,
    'Southampton Football Club':2,
    'Brighton and Hove Albion Football Club':2,
    'Watford FC':2,
    'Burnley FC':2,
    'Fulham Football Club':2,
    'Huddersfield Town':1,
    'Cardiff City':1
}
#Vamos mapear apenas para season 2018, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2018, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2018, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2018].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
120,1110.0,2018,Huddersfield Town,5,0.0,19.0,1,0,9.0,8,...,21.0,-17.0,0.0,7.0,3.0,0.0,5.0,5.0,0.0,1.0
121,603.0,2018,Cardiff City,5,0.0,17.0,0,0,7.0,7,...,23.0,-14.0,1.0,7.0,2.0,0.0,5.0,5.0,1.0,1.0
122,631.0,2018,Chelsea Football Club,2,3.0,2.0,0,1,5.0,8,...,7.0,17.0,7.0,0.0,3.0,10.0,5.0,5.0,0.0,5.0
123,1010.0,2018,Watford FC,3,2.0,6.0,1,1,3.0,7,...,12.0,4.0,6.0,3.0,1.0,2.0,6.0,4.0,0.0,2.0
124,180.0,2018,Southampton Football Club,4,0.0,12.0,1,2,14.0,0,...,14.0,-8.0,1.0,5.0,4.0,2.0,5.0,5.0,0.0,2.0
125,985.0,2018,Manchester United Football Club,2,2.0,7.0,1,1,7.0,16,...,17.0,0.0,5.0,3.0,2.0,3.0,5.0,5.0,0.0,4.0
126,543.0,2018,Wolverhampton Wanderers Football Club,3,2.0,10.0,1,2,22.0,2,...,9.0,0.0,4.0,3.0,3.0,0.0,5.0,5.0,1.0,3.0
127,873.0,2018,Crystal Palace Football Club,3,2.0,4.0,0,1,13.0,7,...,13.0,-6.0,2.0,6.0,2.0,1.0,5.0,5.0,0.0,3.0
128,379.0,2018,West Ham United Football Club,3,0.0,20.0,0,0,1.0,9,...,15.0,-6.0,2.0,6.0,2.0,1.0,5.0,5.0,0.0,3.0
129,1237.0,2018,Brighton and Hove Albion Football Club,4,0.0,16.0,0,0,12.0,7,...,13.0,-2.0,4.0,4.0,2.0,3.0,5.0,5.0,0.0,2.0


In [2466]:
#Vamos para 2021
df_novo_final.loc[df_novo_final['season']==2021].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
140,1237.0,2021,Brighton and Hove Albion Football Club,3,2.0,8.0,0,1,14.0,16,...,11.0,0.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0,
141,31.0,2021,Liverpool Football Club,2,3.0,3.0,0,1,5.0,8,...,8.0,21.0,6.0,0.0,4.0,10.0,5.0,5.0,0.0,
142,379.0,2021,West Ham United Football Club,3,4.0,4.0,0,1,1.0,24,...,11.0,9.0,6.0,2.0,2.0,3.0,5.0,5.0,0.0,
143,148.0,2021,Tottenham Hotspur Football Club,2,1.0,9.0,1,1,5.0,6,...,16.0,-7.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0,
144,180.0,2021,Southampton Football Club,4,1.0,16.0,0,0,3.0,17,...,12.0,-3.0,2.0,3.0,5.0,3.0,5.0,5.0,0.0,
145,281.0,2021,Manchester City Football Club,1,0.0,14.0,0,0,5.0,6,...,6.0,14.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0,
146,11.0,2021,Arsenal Football Club,2,0.0,17.0,0,0,1.0,7,...,13.0,-1.0,5.0,3.0,2.0,7.0,5.0,5.0,0.0,
147,1003.0,2021,Leicester City Football Club,3,1.0,10.0,1,1,1.0,6,...,17.0,-2.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0,
148,1010.0,2021,Watford FC,5,3.0,7.0,1,1,7.0,23,...,18.0,-6.0,3.0,6.0,1.0,0.0,5.0,5.0,1.0,
149,543.0,2021,Wolverhampton Wanderers Football Club,3,0.0,13.0,0,0,11.0,6,...,10.0,1.0,5.0,4.0,1.0,5.0,5.0,5.0,0.0,


In [2467]:
#2021
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Chelsea Football Club':5,
    'Liverpool Football Club':5,
    'Manchester United Football Club':5,
    'Tottenham Hotspur Football Club':4,
    'Arsenal Football Club':4,
    'Leicester City Football Club':4,
    'Everton Football Club':4,
    'West Ham United Football Club': 4,
    'Aston Villa Football Club':3,
    'Leeds United':3,
    'Brighton and Hove Albion Football Club':3,
    'Wolverhampton Wanderers Football Club':3,
    'Southampton Football Club':3,
    'Crystal Palace Football Club':2,
    'Newcastle United Football Club':2,
    'Burnley FC':2,
    'Brentford Football Club':2,
    'Norwich City':1,
    'Watford FC':1
}
#Vamos mapear apenas para season 2021, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2021, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2021, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2021].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
140,1237.0,2021,Brighton and Hove Albion Football Club,3,2.0,8.0,0,1,14.0,16,...,11.0,0.0,4.0,2.0,4.0,1.0,5.0,5.0,0.0,3.0
141,31.0,2021,Liverpool Football Club,2,3.0,3.0,0,1,5.0,8,...,8.0,21.0,6.0,0.0,4.0,10.0,5.0,5.0,0.0,5.0
142,379.0,2021,West Ham United Football Club,3,4.0,4.0,0,1,1.0,24,...,11.0,9.0,6.0,2.0,2.0,3.0,5.0,5.0,0.0,4.0
143,148.0,2021,Tottenham Hotspur Football Club,2,1.0,9.0,1,1,5.0,6,...,16.0,-7.0,5.0,5.0,0.0,0.0,5.0,5.0,0.0,4.0
144,180.0,2021,Southampton Football Club,4,1.0,16.0,0,0,3.0,17,...,12.0,-3.0,2.0,3.0,5.0,3.0,5.0,5.0,0.0,3.0
145,281.0,2021,Manchester City Football Club,1,0.0,14.0,0,0,5.0,6,...,6.0,14.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0,5.0
146,11.0,2021,Arsenal Football Club,2,0.0,17.0,0,0,1.0,7,...,13.0,-1.0,5.0,3.0,2.0,7.0,5.0,5.0,0.0,4.0
147,1003.0,2021,Leicester City Football Club,3,1.0,10.0,1,1,1.0,6,...,17.0,-2.0,4.0,4.0,2.0,0.0,5.0,5.0,0.0,4.0
148,1010.0,2021,Watford FC,5,3.0,7.0,1,1,7.0,23,...,18.0,-6.0,3.0,6.0,1.0,0.0,5.0,5.0,1.0,1.0
149,543.0,2021,Wolverhampton Wanderers Football Club,3,0.0,13.0,0,0,11.0,6,...,10.0,1.0,5.0,4.0,1.0,5.0,5.0,5.0,0.0,3.0


In [2468]:
#Vamos para 2022
df_novo_final.loc[df_novo_final['season']==2022].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
160,931.0,2022,Fulham Football Club,3,2.0,9.0,1,2,1.0,2,...,19.0,-3.0,4.0,4.0,2.0,0.0,5.0,5.0,1.0,
161,29.0,2022,Everton Football Club,4,0.0,15.0,1,0,11.0,6,...,13.0,-5.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,
162,985.0,2022,Manchester United Football Club,2,1.0,14.0,1,0,5.0,16,...,18.0,-2.0,5.0,3.0,2.0,1.0,4.0,6.0,0.0,
163,1237.0,2022,Brighton and Hove Albion Football Club,2,2.0,7.0,0,1,14.0,16,...,9.0,8.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0,
164,543.0,2022,Wolverhampton Wanderers Football Club,4,1.0,13.0,0,0,5.0,16,...,14.0,-11.0,1.0,6.0,3.0,0.0,4.0,6.0,0.0,
165,11.0,2022,Arsenal Football Club,2,2.0,2.0,0,1,1.0,7,...,10.0,17.0,9.0,1.0,0.0,4.0,6.0,4.0,0.0,
166,631.0,2022,Chelsea Football Club,3,1.0,8.0,0,1,14.0,6,...,12.0,2.0,5.0,3.0,2.0,3.0,5.0,5.0,0.0,
167,31.0,2022,Liverpool Football Club,2,2.0,10.0,0,2,5.0,2,...,12.0,10.0,3.0,2.0,5.0,0.0,5.0,5.0,0.0,
168,281.0,2022,Manchester City Football Club,1,2.0,5.0,0,1,7.0,7,...,11.0,26.0,8.0,0.0,2.0,10.0,6.0,4.0,0.0,
169,989.0,2022,Association Football Club Bournemouth,4,2.0,3.0,1,1,11.0,7,...,22.0,-14.0,3.0,4.0,3.0,3.0,6.0,4.0,1.0,


In [2469]:

#2022
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Liverpool Football Club':5,
    'Chelsea Football Club':5,
    'Tottenham Hotspur Football Club':4,
    'Manchester United Football Club':4,
    'Arsenal Football Club':4,
    'West Ham United Football Club': 4,
    'Leicester City Football Club':3,
    'Aston Villa Football Club':3,
    'Wolverhampton Wanderers Football Club':3,
    'Brighton and Hove Albion Football Club':3,
    'Crystal Palace Football Club':3,
    'Newcastle United Football Club':3,
    'Everton Football Club':2,
    'Southampton Football Club':2,
    'Leeds United':2,
    'Brentford Football Club':2,
    'Fulham Football Club':2,
    'Nottingham Forest Football Club':1,
    'Association Football Club Bournemouth':1
}
#Vamos mapear apenas para season 2022, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2022, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2022, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2022].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
160,931.0,2022,Fulham Football Club,3,2.0,9.0,1,2,1.0,2,...,19.0,-3.0,4.0,4.0,2.0,0.0,5.0,5.0,1.0,2.0
161,29.0,2022,Everton Football Club,4,0.0,15.0,1,0,11.0,6,...,13.0,-5.0,2.0,4.0,4.0,0.0,5.0,5.0,0.0,2.0
162,985.0,2022,Manchester United Football Club,2,1.0,14.0,1,0,5.0,16,...,18.0,-2.0,5.0,3.0,2.0,1.0,4.0,6.0,0.0,4.0
163,1237.0,2022,Brighton and Hove Albion Football Club,2,2.0,7.0,0,1,14.0,16,...,9.0,8.0,6.0,2.0,2.0,0.0,5.0,5.0,0.0,3.0
164,543.0,2022,Wolverhampton Wanderers Football Club,4,1.0,13.0,0,0,5.0,16,...,14.0,-11.0,1.0,6.0,3.0,0.0,4.0,6.0,0.0,3.0
165,11.0,2022,Arsenal Football Club,2,2.0,2.0,0,1,1.0,7,...,10.0,17.0,9.0,1.0,0.0,4.0,6.0,4.0,0.0,4.0
166,631.0,2022,Chelsea Football Club,3,1.0,8.0,0,1,14.0,6,...,12.0,2.0,5.0,3.0,2.0,3.0,5.0,5.0,0.0,5.0
167,31.0,2022,Liverpool Football Club,2,2.0,10.0,0,2,5.0,2,...,12.0,10.0,3.0,2.0,5.0,0.0,5.0,5.0,0.0,5.0
168,281.0,2022,Manchester City Football Club,1,2.0,5.0,0,1,7.0,7,...,11.0,26.0,8.0,0.0,2.0,10.0,6.0,4.0,0.0,5.0
169,989.0,2022,Association Football Club Bournemouth,4,2.0,3.0,1,1,11.0,7,...,22.0,-14.0,3.0,4.0,3.0,3.0,6.0,4.0,1.0,1.0


In [2470]:
#Vamos para 2023:
df_novo_final.loc[df_novo_final['season']==2023].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
180,350.0,2023,Sheffield United,5,0.0,14.0,1,0,22.0,6,...,29.0,-22.0,0.0,9.0,1.0,0.0,5.0,5.0,1.0,
181,762.0,2023,Newcastle United Football Club,3,5.0,1.0,1,1,5.0,19,...,11.0,15.0,5.0,3.0,2.0,6.0,5.0,5.0,0.0,
182,1237.0,2023,Brighton and Hove Albion Football Club,3,4.0,2.0,1,1,1.0,18,...,19.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0,0.0,
183,281.0,2023,Manchester City Football Club,1,3.0,3.0,0,1,1.0,8,...,7.0,15.0,8.0,2.0,0.0,2.0,4.0,6.0,0.0,
184,405.0,2023,Aston Villa Football Club,2,1.0,18.0,0,0,1.0,19,...,14.0,12.0,7.0,2.0,1.0,6.0,5.0,5.0,0.0,
185,985.0,2023,Manchester United Football Club,3,1.0,7.0,1,1,7.0,6,...,16.0,-5.0,5.0,5.0,0.0,0.0,6.0,4.0,0.0,
186,873.0,2023,Crystal Palace Football Club,3,1.0,5.0,0,1,1.0,6,...,13.0,-5.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0,
187,703.0,2023,Nottingham Forest Football Club,4,1.0,19.0,0,0,14.0,16,...,15.0,-5.0,2.0,4.0,4.0,0.0,4.0,6.0,0.0,
188,543.0,2023,Wolverhampton Wanderers Football Club,4,0.0,15.0,0,0,13.0,6,...,17.0,-4.0,3.0,4.0,3.0,5.0,5.0,5.0,0.0,
189,1132.0,2023,Burnley FC,5,0.0,17.0,1,0,11.0,8,...,25.0,-17.0,1.0,8.0,1.0,0.0,5.0,5.0,1.0,


In [2471]:

#2023
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Arsenal Football Club':5,
    'Manchester United Football Club':5,
    'Liverpool Football Club':5,
    'Chelsea Football Club':4,
    'Newcastle United Football Club':4,
    'Brighton and Hove Albion Football Club':4,
    'Tottenham Hotspur Football Club':4,
    'Aston Villa Football Club':4,
    'West Ham United Football Club': 3,
    'Fulham Football Club':3,
    'Brentford Football Club':3,
    'Crystal Palace Football Club':3,
    'Wolverhampton Wanderers Football Club':3,
    'Everton Football Club':3,
    'Nottingham Forest Football Club':2,
    'Association Football Club Bournemouth':2,
    'Burnley FC':2,
    'Sheffield United':2,
    'Luton Town':1
}
#Vamos mapear apenas para season 2023, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2023, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2023, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2023].head(20)


Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
180,350.0,2023,Sheffield United,5,0.0,14.0,1,0,22.0,6,...,29.0,-22.0,0.0,9.0,1.0,0.0,5.0,5.0,1.0,2.0
181,762.0,2023,Newcastle United Football Club,3,5.0,1.0,1,1,5.0,19,...,11.0,15.0,5.0,3.0,2.0,6.0,5.0,5.0,0.0,4.0
182,1237.0,2023,Brighton and Hove Albion Football Club,3,4.0,2.0,1,1,1.0,18,...,19.0,4.0,5.0,3.0,2.0,1.0,6.0,4.0,0.0,4.0
183,281.0,2023,Manchester City Football Club,1,3.0,3.0,0,1,1.0,8,...,7.0,15.0,8.0,2.0,0.0,2.0,4.0,6.0,0.0,5.0
184,405.0,2023,Aston Villa Football Club,2,1.0,18.0,0,0,1.0,19,...,14.0,12.0,7.0,2.0,1.0,6.0,5.0,5.0,0.0,4.0
185,985.0,2023,Manchester United Football Club,3,1.0,7.0,1,1,7.0,6,...,16.0,-5.0,5.0,5.0,0.0,0.0,6.0,4.0,0.0,5.0
186,873.0,2023,Crystal Palace Football Club,3,1.0,5.0,0,1,1.0,6,...,13.0,-5.0,3.0,4.0,3.0,0.0,5.0,5.0,0.0,3.0
187,703.0,2023,Nottingham Forest Football Club,4,1.0,19.0,0,0,14.0,16,...,15.0,-5.0,2.0,4.0,4.0,0.0,4.0,6.0,0.0,2.0
188,543.0,2023,Wolverhampton Wanderers Football Club,4,0.0,15.0,0,0,13.0,6,...,17.0,-4.0,3.0,4.0,3.0,5.0,5.0,5.0,0.0,3.0
189,1132.0,2023,Burnley FC,5,0.0,17.0,1,0,11.0,8,...,25.0,-17.0,1.0,8.0,1.0,0.0,5.0,5.0,1.0,2.0


In [2472]:

#2024
mapeamento_forca = {
    'Manchester City Football Club':5,
    'Arsenal Football Club':5,
    'Liverpool Football Club':5,
    'Aston Villa Football Club':5,
    'Chelsea Football Club':4,
    'Manchester United Football Club':4,
    'Tottenham Hotspur Football Club':4,
    'Newcastle United Football Club':4,
    'West Ham United Football Club': 4,
    'Brighton and Hove Albion Football Club':3,
    'Wolverhampton Wanderers Football Club':3,
    'Crystal Palace Football Club':3,
    'Fulham Football Club':3,
    'Everton Football Club':3,
    'Brentford Football Club':2,
    'Association Football Club Bournemouth':2,
    'Nottingham Forest Football Club':2,
    'Leicester City Football Club':2,
    'Ipswich Town Football Club':1,
    'Southampton Football Club':1
}
#Vamos mapear apenas para season 2024, portanto usaremos loc
df_novo_final.loc[df_novo_final['season'] == 2024, 'forca'] = df_novo_final.loc[df_novo_final['season'] == 2024, 'name'].map(mapeamento_forca)
df_novo_final.loc[df_novo_final['season']==2024].head(20)

Unnamed: 0,club_id,season,name,last_position,own_goals_1,own_position_1,hosting_1,is_win_1,home_club_formation_1,aggregate_1,...,gols_sofridos,saldo_gols,vitorias,derrotas,empates,jogos_sem_perder,casa,fora,recem_promovido,forca
200,11.0,2024,Arsenal Football Club,2,2.0,2.0,1,1,5.0,7,...,11.0,6.0,5.0,2.0,3.0,0.0,5.0,5.0,0.0,5.0
201,1148.0,2024,Brentford Football Club,3,2.0,6.0,1,1,5.0,16,...,20.0,-1.0,4.0,5.0,1.0,0.0,5.0,5.0,0.0,2.0
202,1237.0,2024,Brighton and Hove Albion Football Club,3,3.0,1.0,0,1,1.0,8,...,14.0,3.0,4.0,2.0,4.0,0.0,5.0,5.0,0.0,3.0
203,931.0,2024,Fulham Football Club,3,0.0,15.0,0,0,1.0,6,...,13.0,1.0,4.0,3.0,3.0,2.0,5.0,5.0,0.0,3.0
204,281.0,2024,Manchester City Football Club,2,2.0,4.0,0,1,14.0,7,...,11.0,10.0,7.0,1.0,2.0,0.0,5.0,5.0,0.0,5.0
205,543.0,2024,Wolverhampton Wanderers Football Club,4,0.0,19.0,0,0,1.0,7,...,27.0,-13.0,0.0,7.0,3.0,2.0,5.0,5.0,0.0,3.0
206,703.0,2024,Nottingham Forest Football Club,3,1.0,11.0,1,2,1.0,1,...,7.0,7.0,5.0,1.0,4.0,4.0,5.0,5.0,0.0,2.0
207,1003.0,2024,Leicester City Football Club,5,1.0,10.0,1,2,1.0,1,...,18.0,-4.0,2.0,4.0,4.0,1.0,5.0,5.0,1.0,2.0
208,677.0,2024,Ipswich Town Football Club,5,0.0,18.0,1,0,1.0,7,...,21.0,-11.0,0.0,5.0,5.0,1.0,5.0,5.0,1.0,1.0
209,148.0,2024,Tottenham Hotspur Football Club,4,1.0,12.0,0,2,5.0,1,...,11.0,11.0,5.0,4.0,1.0,1.0,5.0,5.0,0.0,4.0


# Ufa, finalmente acabamos as forças
Agora vamos montar um novo df e treinar usando o KNN como fizemos antes para comparamos os resultados

In [2473]:
#Vamos montar um novo dataframe 
novo_tf_com_forcas = df_novo_final[['season', 'last_position', 'total_points_10', 'vitorias', 'derrotas', 'empates', 'casa', 'fora','gols_pro','gols_sofridos','saldo_gols','jogos_sem_perder','recem_promovido','forca']].copy()
#Vamos ver o novo dataframe
novo_tf_com_forcas.head(20)

Unnamed: 0,season,last_position,total_points_10,vitorias,derrotas,empates,casa,fora,gols_pro,gols_sofridos,saldo_gols,jogos_sem_perder,recem_promovido,forca
0,2012,4,4.0,1.0,8.0,1.0,5.0,5.0,14.0,28.0,-14.0,0.0,1.0,1.0
1,2012,4,9.0,1.0,3.0,6.0,4.0,6.0,8.0,10.0,-2.0,0.0,0.0,2.0
2,2012,3,15.0,4.0,3.0,3.0,5.0,5.0,21.0,16.0,5.0,4.0,0.0,3.0
3,2012,4,9.0,2.0,5.0,3.0,4.0,6.0,8.0,14.0,-6.0,2.0,0.0,3.0
4,2012,3,15.0,4.0,3.0,3.0,6.0,4.0,13.0,11.0,2.0,1.0,1.0,2.0
5,2012,3,17.0,5.0,3.0,2.0,6.0,4.0,15.0,11.0,4.0,1.0,0.0,3.0
6,2012,4,12.0,2.0,2.0,6.0,5.0,5.0,9.0,9.0,0.0,0.0,0.0,3.0
7,2012,2,23.0,7.0,1.0,2.0,5.0,5.0,22.0,10.0,12.0,1.0,0.0,5.0
8,2012,5,5.0,0.0,5.0,5.0,4.0,6.0,12.0,21.0,-9.0,2.0,1.0,1.0
9,2012,3,10.0,2.0,4.0,4.0,5.0,5.0,8.0,18.0,-10.0,3.0,0.0,2.0


In [2474]:
#Vamos separar X e y 
X = novo_tf_com_forcas.drop(columns=['last_position'])
#Vamos separar nosso x test e x train de acordo com as temporadas.
X_train = X[X['season'] != 2024]
X_test = X[X['season'] == 2024]
# Agora vamos separar y para ser apenas a coluna last_position
y_train = novo_tf_com_forcas[novo_tf_com_forcas['season'] != 2024]['last_position']
y_test = novo_tf_com_forcas[novo_tf_com_forcas['season'] == 2024]['last_position']

In [2475]:
#Vamos aplicar o scaler nos dados de treino e teste
scaler = MinMaxScaler(feature_range=(0,1))

In [2476]:
#Vamos aplicar o scaler nos dados de treino e teste
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [2477]:
knn = KNeighborsClassifier(n_neighbors=4)

In [2478]:
knn.fit(X_train, y_train)

In [2479]:
y_pred = knn.predict(X_test)

In [2480]:
print(f"Predições: {y_pred}")

Predições: [2 4 3 3 2 5 2 5 5 1 3 1 2 2 3 2 2 5 4 3]


In [2481]:
#Vamos ver a acurácia do modelo
acuracia = accuracy_score(y_test, y_pred)
print(f"Acurácia do modelo: {acuracia:.2f}")

Acurácia do modelo: 0.70


In [2482]:
#Vamos printar o dataframe com o nome dos times na season 2024
df_final_com_pontos = pd.read_csv('df_final_premier_league.csv')
#Vamos criar um novo dataframe com as predições
df_predicoes = pd.DataFrame({
    'name': df_final_com_pontos[df_final_com_pontos['season'] == 2024]['name'].values,
    'last_position': y_pred
})
#Vamos ver o dataframe de predições
print(df_predicoes.head(20))

                                      name  last_position
0                    Arsenal Football Club              2
1                  Brentford Football Club              4
2   Brighton and Hove Albion Football Club              3
3                     Fulham Football Club              3
4            Manchester City Football Club              2
5    Wolverhampton Wanderers Football Club              5
6          Nottingham Forest Football Club              2
7             Leicester City Football Club              5
8               Ipswich Town Football Club              5
9          Tottenham Hotspur Football Club              1
10            Crystal Palace Football Club              3
11                 Liverpool Football Club              1
12          Newcastle United Football Club              2
13           West Ham United Football Club              2
14   Association Football Club Bournemouth              3
15                   Chelsea Football Club              2
16            