In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from datetime import datetime
import unicodedata
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Função para remover acentos
def remove_acentos(text):
  return ''.join(
    char for char in unicodedata.normalize('NFD', text)
    if unicodedata.category(char) != 'Mn'
  )

def normalize_data(data):
  min_data = np.min(data)
  max_data = np.max(data)

  return (data - min_data) / (max_data - min_data)

In [None]:
matches = pd.read_csv('../assets/matches.csv', sep=';')
teams = pd.read_csv('../assets/teams.csv', sep=',')
matches_2 = pd.read_csv('../assets/BRA.csv', sep=',')

matches_2['formated_date'] = pd.to_datetime(matches_2['Date'], format='%d/%m/%Y')

matches_2.rename(columns={
  'Home': 'home_team_name', 
  'Away': 'away_team_name',
  'HG': 'home_team_goal_count',
  'AG': 'away_team_goal_count'}, inplace=True)

# Captura apenas os dados do Brasileirão 2024
matches_2 = matches_2.query('formated_date >= "2024-04-01"')

# Aplicar a função a uma coluna específica (ex: 'nome')
matches['home_team_name'] = matches['home_team_name'].apply(remove_acentos)
matches['away_team_name'] = matches['away_team_name'].apply(remove_acentos)
matches_2['home_team_name'] = matches_2['home_team_name'].apply(remove_acentos)
matches_2['away_team_name'] = matches_2['away_team_name'].apply(remove_acentos)
teams['common_name'] = teams['common_name'].apply(remove_acentos)

In [None]:
teams['first_team_to_score_percentage_away'] = normalize_data(teams['first_team_to_score_percentage_away'])
teams['wins_away'] = normalize_data(teams['wins_away'])

scatter = px.scatter(
  teams, 
  x='first_team_to_score_percentage_away', 
  y='wins_away',
  title='Primeiro a pontuar fora de casa x Vitórias fora de casa',
  labels={'first_team_to_score_percentage_away': 'Primeiro a pontuar fora de casa', 'wins_away': 'Vitórias fora de casa'}, 
  trendline='ols')

scatter.update_layout(title_x=0.5)