In [1]:
import soccerdata as sd
from sqlalchemy import create_engine, text
import pandas as pd
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_TN_SOFIFA_TEAMS_STATS = os.getenv('DB_TN_SOFIFA_TEAMS_STATS')

connection_url = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
engine = create_engine(connection_url)

In [15]:
no_cache_latest=False
scrap_all=False

KEY_1 = 'team'
KEY_2 = 'update'


# Charger les données de SoFIFA
# Big 5 leagues
so_fifa_latest = sd.SoFIFA(versions="latest", no_cache=no_cache_latest)
team_ratings = so_fifa_latest.read_team_ratings()
if scrap_all:
    sofifa_all = sd.SoFIFA(versions="all", no_cache=True)
    team_ratings_all = sofifa_all.read_team_ratings()
    team_ratings = pd.concat([team_ratings_all, team_ratings], ignore_index=True)
    team_ratings = team_ratings.drop_duplicates(subset=[KEY_1, KEY_2], keep='last')

# INTERNATIONAL TEAMS
so_fifa_latest = sd.SoFIFA(versions="latest", no_cache=no_cache_latest)
team_ratings_nat = so_fifa_latest.read_team_ratings_nationals()
if scrap_all:
    sofifa_all = sd.SoFIFA(versions="all", no_cache=True)
    team_ratings_nat_all = sofifa_all.read_team_ratings_nationals()
    team_ratings_nat = pd.concat([team_ratings_nat_all, team_ratings_nat], ignore_index=True)
    team_ratings_nat = team_ratings_nat.drop_duplicates(subset=[KEY_1, KEY_2], keep='last')

 

In [4]:
team_ratings_nat.reset_index(inplace=True)
team_ratings_nat

Unnamed: 0,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,build_up_passing,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,Argentina,83,85,83,82,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,9,26,28.64,27.96,FC 24,"Jun 12, 2024"
1,Belgium,81,82,81,77,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,8,26,27.73,26.23,FC 24,"Jun 12, 2024"
2,Croatia,79,77,82,77,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,6,26,27.18,26.46,FC 24,"Jun 12, 2024"
3,Czech Republic,75,74,75,75,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,5,26,25.45,25.62,FC 24,"Jun 12, 2024"
4,Denmark,79,76,78,79,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,6,26,27.27,26.35,FC 24,"Jun 12, 2024"
5,England,85,87,86,83,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,8,26,26.27,25.69,FC 24,"Jun 12, 2024"
6,Finland,71,70,71,67,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,3,26,28.73,26.5,FC 24,"Jun 12, 2024"
7,France,84,86,85,83,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,10,26,27.27,25.65,FC 24,"Jun 12, 2024"
8,Germany,85,81,85,83,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,10,26,27.73,28.04,FC 24,"Jun 12, 2024"
9,Ghana,75,74,77,73,€0,€0,Slow,Little,Short,...,Deep,Narrow,Cover,10,5,26,26.64,25.88,FC 24,"Jun 12, 2024"


In [5]:
team_ratings.reset_index(inplace=True)
team_ratings

Unnamed: 0,league,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,ENG-Premier League,AFC Bournemouth,76,79,75,74,€0,€215.5M,Slow,Little,...,Deep,Narrow,Cover,2,1,31,25.45,24.00,FC 24,"Jun 12, 2024"
1,ENG-Premier League,Arsenal,83,83,85,82,€0,€2.1B,Slow,Little,...,Deep,Narrow,Cover,8,7,33,24.91,23.27,FC 24,"Jun 12, 2024"
2,ENG-Premier League,Aston Villa,81,83,79,78,€0,€695.5M,Slow,Little,...,Deep,Narrow,Cover,6,5,33,25.73,24.12,FC 24,"Jun 12, 2024"
3,ENG-Premier League,Brentford,77,77,75,76,€0,€220.5M,Slow,Little,...,Deep,Narrow,Cover,3,2,33,26.36,24.33,FC 24,"Jun 12, 2024"
4,ENG-Premier League,Brighton & Hove Albion,77,75,74,77,€0,€425M,Slow,Little,...,Deep,Narrow,Cover,5,3,33,24.91,23.88,FC 24,"Jun 12, 2024"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,ITA-Serie A,Roma,80,83,80,79,€0,€660M,Slow,Little,...,Deep,Narrow,Cover,8,7,30,27.27,25.17,FC 24,"Jun 12, 2024"
92,ITA-Serie A,Salernitana,74,72,73,72,€0,€70M,Slow,Little,...,Deep,Narrow,Cover,3,1,31,27.09,25.84,FC 24,"Jun 12, 2024"
93,ITA-Serie A,Sassuolo,75,76,74,73,€0,€110M,Slow,Little,...,Deep,Narrow,Cover,4,3,28,26.36,25.32,FC 24,"Jun 12, 2024"
94,ITA-Serie A,Torino,76,77,75,77,€0,€125M,Slow,Little,...,Deep,Narrow,Cover,5,4,26,26.00,24.42,FC 24,"Jun 12, 2024"


In [6]:
# Convertir les types de données
team_ratings_nat["league"] = "INT"
team_ratings_nat.loc[team_ratings_nat["update"] == "World Cup 2022", "update"] = "Nov 20, 2022"

team_ratings = pd.concat([team_ratings, team_ratings_nat], ignore_index=True)
team_ratings

Unnamed: 0,league,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,ENG-Premier League,AFC Bournemouth,76,79,75,74,€0,€215.5M,Slow,Little,...,Deep,Narrow,Cover,2,1,31,25.45,24.00,FC 24,"Jun 12, 2024"
1,ENG-Premier League,Arsenal,83,83,85,82,€0,€2.1B,Slow,Little,...,Deep,Narrow,Cover,8,7,33,24.91,23.27,FC 24,"Jun 12, 2024"
2,ENG-Premier League,Aston Villa,81,83,79,78,€0,€695.5M,Slow,Little,...,Deep,Narrow,Cover,6,5,33,25.73,24.12,FC 24,"Jun 12, 2024"
3,ENG-Premier League,Brentford,77,77,75,76,€0,€220.5M,Slow,Little,...,Deep,Narrow,Cover,3,2,33,26.36,24.33,FC 24,"Jun 12, 2024"
4,ENG-Premier League,Brighton & Hove Albion,77,75,74,77,€0,€425M,Slow,Little,...,Deep,Narrow,Cover,5,3,33,24.91,23.88,FC 24,"Jun 12, 2024"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,INT,Spain,83,83,84,83,€0,€0,Slow,Little,...,Deep,Narrow,Cover,10,9,26,27.00,26.69,FC 24,"Jun 12, 2024"
122,INT,Sweden,77,82,76,74,€0,€0,Slow,Little,...,Deep,Narrow,Cover,10,5,26,26.64,26.85,FC 24,"Jun 12, 2024"
123,INT,Ukraine,77,81,77,74,€0,€0,Slow,Little,...,Deep,Narrow,Cover,10,6,26,24.27,26.23,FC 24,"Jun 12, 2024"
124,INT,United States,76,76,76,76,€0,€0,Slow,Little,...,Deep,Narrow,Cover,10,6,26,25.36,24.38,FC 24,"Jun 12, 2024"


In [7]:
# Convertir les types de données
team_ratings_nat["league"] = "INT"
team_ratings_nat.loc[team_ratings_nat["update"] == "World Cup 2022", "update"] = "Nov 20, 2022"

team_ratings = pd.concat([team_ratings, team_ratings_nat], ignore_index=True)

# Convertir les types de données
team_ratings['update'] = pd.to_datetime(team_ratings['update'])
team_ratings["overall"] = team_ratings["overall"].astype(int)
team_ratings["attack"] = team_ratings["attack"].astype(int)
team_ratings["midfield"] = team_ratings["midfield"].astype(int)
team_ratings["defence"] = team_ratings["defence"].astype(int)
team_ratings["transfer_budget"] = team_ratings["transfer_budget"].str.replace("€", "").str.replace("M", "0000").str.replace("K", "000").str.replace(".", "").astype(int)
team_ratings["club_worth"] = team_ratings["club_worth"].str.replace("€", "").str.replace("M", "0000").str.replace("K", "000").str.replace("B", "000000000").str.replace(".", "").astype(float)
team_ratings["defence_domestic_prestige"] = team_ratings["defence_domestic_prestige"].astype(int)
team_ratings["international_prestige"] = team_ratings["international_prestige"].astype(int)
team_ratings["players"] = team_ratings["players"].astype(int)
team_ratings["starting_xi_average_age"] = team_ratings["starting_xi_average_age"].astype(float)
team_ratings["whole_team_average_age"] = team_ratings["whole_team_average_age"].astype(float)
team_ratings.sort_values('update', ascending=False)



Unnamed: 0,league,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,ENG-Premier League,AFC Bournemouth,76,79,75,74,0,2.155000e+07,Slow,Little,...,Deep,Narrow,Cover,2,1,31,25.45,24.00,FC 24,2024-06-12
107,INT,Iceland,71,74,71,71,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,2,26,27.55,26.62,FC 24,2024-06-12
100,INT,Denmark,79,76,78,79,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,6,26,27.27,26.35,FC 24,2024-06-12
101,INT,England,85,87,86,83,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,8,26,26.27,25.69,FC 24,2024-06-12
102,INT,Finland,71,70,71,67,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,3,26,28.73,26.50,FC 24,2024-06-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52,FRA-Ligue 1,Paris Saint Germain,83,84,81,81,0,4.200000e+10,Slow,Little,...,Deep,Narrow,Cover,10,9,30,23.09,24.47,FC 24,2024-06-12
53,FRA-Ligue 1,Rennes,76,77,77,74,0,1.800000e+06,Slow,Little,...,Deep,Narrow,Cover,7,4,25,24.82,23.40,FC 24,2024-06-12
54,FRA-Ligue 1,Stade Brestois 29,75,75,74,76,0,9.200000e+05,Slow,Little,...,Deep,Narrow,Cover,2,1,24,26.55,25.17,FC 24,2024-06-12
55,FRA-Ligue 1,Stade de Reims,74,73,75,74,0,1.200000e+06,Slow,Little,...,Deep,Narrow,Cover,4,1,25,25.91,24.28,FC 24,2024-06-12


In [8]:
team_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 27 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   league                       156 non-null    object        
 1   team                         156 non-null    object        
 2   overall                      156 non-null    int32         
 3   attack                       156 non-null    int32         
 4   midfield                     156 non-null    int32         
 5   defence                      156 non-null    int32         
 6   transfer_budget              156 non-null    int32         
 7   club_worth                   156 non-null    float64       
 8   build_up_speed               156 non-null    object        
 9   build_up_dribbling           156 non-null    object        
 10  build_up_passing             156 non-null    object        
 11  build_up_positioning         156 non-null    

In [47]:
with engine.connect() as conn:
    # Charger les données existantes
    existing_data = pd.read_sql(f'SELECT * FROM {DB_TN_SOFIFA_TEAMS_STATS}', conn)

    # Fusionner les nouvelles données avec les données existantes
    merged_data = pd.concat([existing_data, team_ratings], ignore_index=True)
    merged_data = merged_data.drop_duplicates(subset=[KEY_1, KEY_2], keep='last')

    # Supprimer les anciennes données
    conn.execute(text(f"DELETE FROM {DB_TN_SOFIFA_TEAMS_STATS}"))

    # Insérer les données fusionnées
    merged_data.to_sql(DB_TN_SOFIFA_TEAMS_STATS, conn, if_exists='append', index=False)
    conn.commit()

In [17]:
f"{KEY_1}, {KEY_2}"

'team, update'

In [11]:
# Créer une table temporaire pour les nouvelles données
with engine.connect() as conn:
    temp_table_name = 'temp_table'
    team_ratings.to_sql(temp_table_name, conn, if_exists='replace', index=False)

    # Liste des colonnes
    columns = ', '.join(team_ratings.columns)
    
    # Insérer les données en évitant les doublons
    insert_query = f"""
    INSERT INTO {DB_TN_SOFIFA_TEAMS_STATS} ({columns})
    SELECT {columns}
    FROM {temp_table_name}
    ON CONFLICT {KEY_1}, {KEY_2} DO NOTHING
    """
    conn.execute(text(insert_query))
    conn.execute(text(f"DROP TABLE {temp_table_name}"))

In [12]:
with engine.connect() as conn:
    existing_data = pd.read_sql(f'SELECT * FROM {DB_TN_SOFIFA_TEAMS_STATS}', conn)
existing_data

Unnamed: 0,league,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,INT,Canada,73,73,74,68,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,2,23,26.45,26.17,FIFA 21,2021-05-18
1,INT,Canada,73,77,74,69,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,2,23,25.55,26.65,FIFA 22,2022-01-24
2,INT,Canada,73,77,73,70,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,2,23,25.91,25.78,FIFA 23,2023-07-24
3,INT,Canada,71,73,72,66,0,0.0,Fast,Normal,...,Deep,Narrow,Cover,10,2,23,25.27,24.26,FIFA 19,2019-07-11
4,INT,Canada,71,73,72,66,0,0.0,Fast,Normal,...,Deep,Narrow,Cover,10,2,23,25.27,24.26,FIFA 19,2019-07-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106621,INT,Spain,83,83,84,83,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,9,26,27.00,26.69,FC 24,2024-06-12
106622,INT,Sweden,77,82,76,74,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,5,26,26.64,26.85,FC 24,2024-06-12
106623,INT,Ukraine,77,81,77,74,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,6,26,24.27,26.23,FC 24,2024-06-12
106624,INT,United States,76,76,76,76,0,0.0,Slow,Little,...,Deep,Narrow,Cover,10,6,26,25.36,24.38,FC 24,2024-06-12


In [19]:
import soccerdata as sd
from sqlalchemy import create_engine, text
import pandas as pd
from dotenv import load_dotenv
import os
import logging
import argparse
import sys
from pathlib import Path


#### LOGGING ####
LOG_FOLDER = "db/logs/"
LOG_FILE_NAME = "SOFIFA_teams_stats_table.log"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

filename = Path(sys.argv[0]).resolve().parents[3] / LOG_FOLDER / LOG_FILE_NAME
logging.basicConfig(filename=filename, level=logging.INFO,
                    format=LOG_FORMAT)
logger = logging.getLogger(__name__)
logger.info(f"path to logging file exists ? {os.path.exists(filename)}")
logger.info(f"logging file path: {filename}")

In [20]:

def scrap_data_SOFIFA(teams='big 5', no_cache_latest=True, scrap_all=False, KEY_1='team', KEY_2='update'):
    """Récupérer les données des équipes de SoFIFA"""
    try:
        logger.info(f"Chargement des données des équipes {teams} - latest")
        so_fifa_latest = sd.SoFIFA(versions="latest", no_cache=no_cache_latest)
        team_ratings = so_fifa_latest.read_team_ratings() if teams == 'big 5' else so_fifa_latest.read_team_ratings_nationals()
        if scrap_all:
            logger.info(f"Chargement des données des équipes {teams} - all")
            sofifa_all = sd.SoFIFA(versions="all", no_cache=True)
            team_ratings_all = sofifa_all.read_team_ratings() if teams == 'big 5' else sofifa_all.read_team_ratings_nationals()
            team_ratings = pd.concat([team_ratings_all, team_ratings], ignore_index=True)
            team_ratings = team_ratings.drop_duplicates(subset=[KEY_1, KEY_2], keep='last')
        team_ratings.reset_index(inplace=True)
        return team_ratings
    except Exception as e:
        logger.error(f"Erreur lors du chargement des données des équipes {teams} : {e}")
        return


def convert_data_types(team_ratings, team_ratings_nat, KEY_1='team', KEY_2='update'):
    """Convertir les types de données"""
    logger.info("Conversion des types de données")
    try: 
        team_ratings_nat["league"] = "INT"
        team_ratings_nat.loc[team_ratings_nat["update"] == "World Cup 2022", "update"] = "Nov 20, 2022"
        team_ratings = pd.concat([team_ratings, team_ratings_nat], ignore_index=True)

        # Convertir les types de données
        team_ratings['update'] = pd.to_datetime(team_ratings['update'])
        team_ratings["overall"] = team_ratings["overall"].astype(int)
        team_ratings["attack"] = team_ratings["attack"].astype(int)
        team_ratings["midfield"] = team_ratings["midfield"].astype(int)
        team_ratings["defence"] = team_ratings["defence"].astype(int)
        team_ratings["transfer_budget"] = team_ratings["transfer_budget"].str.replace("€", "").str.replace("M", "0000").str.replace("K", "000").str.replace(".", "").astype(int)
        team_ratings["club_worth"] = team_ratings["club_worth"].str.replace("€", "").str.replace("M", "0000").str.replace("K", "000").str.replace("B", "000000000").str.replace(".", "").astype(float)
        team_ratings["defence_domestic_prestige"] = team_ratings["defence_domestic_prestige"].astype(int)
        team_ratings["international_prestige"] = team_ratings["international_prestige"].astype(int)
        team_ratings["players"] = team_ratings["players"].astype(int)
        team_ratings["starting_xi_average_age"] = team_ratings["starting_xi_average_age"].astype(float)
        team_ratings["whole_team_average_age"] = team_ratings["whole_team_average_age"].astype(float)
        team_ratings.sort_values('update', ascending=False)
        return team_ratings
    
    except Exception as e:
        logger.error(f"Erreur lors de la conversion des types de données: {e}")
        return


In [22]:



def insert_data_SOFIFA_teams_stats_table(no_cache_latest=True, scrap_all=False):
    """Insérer les données des équipes de SoFIFA dans la table SOFIFA teams stats"""

    logger.info("Début de l'insertion des données dans la table SOFIFA teams stats")

    #### VARIABLES ####
    load_dotenv()
    DB_USER = os.getenv("DB_USER")
    DB_PASSWORD = os.getenv('DB_PASSWORD')
    DB_HOST = os.getenv('DB_HOST')
    DB_PORT = os.getenv('DB_PORT')
    DB_NAME = os.getenv('DB_NAME')
    DB_TN_SOFIFA_TEAMS_STATS = os.getenv('DB_TN_SOFIFA_TEAMS_STATS')
    KEY_1 = 'team'
    KEY_2 = 'update'
    DN_TN_TEMP_TABLE = 'temp_table'


    #### CONNECTION A LA BASE DE DONNEES ####
    logger.info("Connexion à la base de données")
    logger.info(f"DB_USER: {DB_USER}")
    try:
        connection_url = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
        engine = create_engine(connection_url)
    except Exception as e:
        logger.error(f"Erreur lors de la connexion à la base de données: {e}")
        return


    #### SCRAPPING SOFIFA TEAMS DATA ####
    logger.info("Chargement des données de SoFIFA")
    logger.info(f"no_cache_latest: {no_cache_latest}")
    logger.info(f"scrap_all: {scrap_all}")
    team_ratings = scrap_data_SOFIFA(teams='big 5', no_cache_latest=no_cache_latest, scrap_all=scrap_all, KEY_1=KEY_1, KEY_2=KEY_2)
    team_ratings_nat = scrap_data_SOFIFA(teams='international', no_cache_latest=no_cache_latest, scrap_all=scrap_all, KEY_1=KEY_1, KEY_2=KEY_2)


    #### CONVERSION DES TYPES DE DONNEES ####
    team_ratings = convert_data_types(team_ratings, team_ratings_nat, KEY_1=KEY_1, KEY_2=KEY_2)


    #### INSERTION DES DONNEES DANS LA BASE DE DONNEES ####
    logger.info("Insertion des données dans la base de données")
    try:
        with engine.connect() as conn:
            team_ratings.to_sql(DN_TN_TEMP_TABLE, conn, if_exists='replace', index=False)

            # Liste des colonnes
            columns = ', '.join(team_ratings.columns)
            
            # Insérer les données en évitant les doublons
            insert_query = f"""
            INSERT INTO {DB_TN_SOFIFA_TEAMS_STATS} ({columns})
            SELECT {columns}
            FROM {DN_TN_TEMP_TABLE}
            ON CONFLICT {KEY_1}, {KEY_2} DO NOTHING
            """
            logger.info("Insertion des nouvelles données en cours...")
            conn.execute(text(insert_query))

            # Compter le nombre de nouvelles lignes insérées
            count_query = f"""
            SELECT COUNT(*) FROM {DN_TN_TEMP_TABLE}
            WHERE NOT EXISTS (
                SELECT 1 FROM {DB_TN_SOFIFA_TEAMS_STATS}
                WHERE {DB_TN_SOFIFA_TEAMS_STATS}.{KEY_1} = {DN_TN_TEMP_TABLE}.{KEY_1}
                AND {DB_TN_SOFIFA_TEAMS_STATS}.{KEY_2} = {DN_TN_TEMP_TABLE}.{KEY_2}
            )
            """
            result = conn.execute(text(count_query))
            inserted_rows = result.scalar()
            logger.info(f"Insertion des nouvelles données terminée avec succès, {inserted_rows} nouvelles lignes insérées")

            conn.execute(text(f"DROP TABLE {DN_TN_TEMP_TABLE}"))
            conn.commit()

    except Exception as e:
        logger.error(f"Erreur lors de l'insertion des données: {e}")
        return
    
    logger.info("Fin de l'insertion des données dans la table SOFIFA teams stats\n\n")


In [24]:

no_cache_latest = False
scrap_all = False

"""Insérer les données des équipes de SoFIFA dans la table SOFIFA teams stats"""

logger.info("Début de l'insertion des données dans la table SOFIFA teams stats")

#### VARIABLES ####
load_dotenv()
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_TN_SOFIFA_TEAMS_STATS = os.getenv('DB_TN_SOFIFA_TEAMS_STATS')
KEY_1 = 'team'
KEY_2 = 'update'
DN_TN_TEMP_TABLE = 'temp_table'


#### CONNECTION A LA BASE DE DONNEES ####
logger.info("Connexion à la base de données")
logger.info(f"DB_USER: {DB_USER}")
try:
    connection_url = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
    engine = create_engine(connection_url)
except Exception as e:
    logger.error(f"Erreur lors de la connexion à la base de données: {e}")


#### SCRAPPING SOFIFA TEAMS DATA ####
logger.info("Chargement des données de SoFIFA")
logger.info(f"no_cache_latest: {no_cache_latest}")
logger.info(f"scrap_all: {scrap_all}")
team_ratings = scrap_data_SOFIFA(teams='big 5', no_cache_latest=no_cache_latest, scrap_all=scrap_all, KEY_1=KEY_1, KEY_2=KEY_2)
team_ratings_nat = scrap_data_SOFIFA(teams='international', no_cache_latest=no_cache_latest, scrap_all=scrap_all, KEY_1=KEY_1, KEY_2=KEY_2)


#### CONVERSION DES TYPES DE DONNEES ####
team_ratings = convert_data_types(team_ratings, team_ratings_nat, KEY_1=KEY_1, KEY_2=KEY_2)


#### INSERTION DES DONNEES DANS LA BASE DE DONNEES ####
logger.info("Insertion des données dans la base de données")
try:
    with engine.connect() as conn:
        team_ratings.to_sql(DN_TN_TEMP_TABLE, conn, if_exists='replace', index=False)

        # Liste des colonnes
        columns = ', '.join(team_ratings.columns)
        
        # Insérer les données en évitant les doublons
        insert_query = f"""
        INSERT INTO {DB_TN_SOFIFA_TEAMS_STATS} ({columns})
        SELECT {columns}
        FROM {DN_TN_TEMP_TABLE}
        ON CONFLICT {KEY_1}, {KEY_2} DO NOTHING
        """
        logger.info("Insertion des nouvelles données en cours...")
        conn.execute(text(insert_query))

        # Compter le nombre de nouvelles lignes insérées
        count_query = f"""
        SELECT COUNT(*) FROM {DN_TN_TEMP_TABLE}
        WHERE NOT EXISTS (
            SELECT 1 FROM {DB_TN_SOFIFA_TEAMS_STATS}
            WHERE {DB_TN_SOFIFA_TEAMS_STATS}.{KEY_1} = {DN_TN_TEMP_TABLE}.{KEY_1}
            AND {DB_TN_SOFIFA_TEAMS_STATS}.{KEY_2} = {DN_TN_TEMP_TABLE}.{KEY_2}
        )
        """
        result = conn.execute(text(count_query))
        inserted_rows = result.scalar()
        logger.info(f"Insertion des nouvelles données terminée avec succès, {inserted_rows} nouvelles lignes insérées")

        conn.execute(text(f"DROP TABLE {DN_TN_TEMP_TABLE}"))
        conn.commit()

except Exception as e:
    logger.error(f"Erreur lors de l'insertion des données: {e}")

logger.info("Fin de l'insertion des données dans la table SOFIFA teams stats\n\n")

In [25]:
team_ratings

Unnamed: 0,league,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,...,defence_pressure,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update
0,ENG-Premier League,AFC Bournemouth,76,79,75,74,0,2.155000e+07,Slow,Little,...,Deep,Narrow,Cover,2,1,31,25.45,24.00,FC 24,2024-06-12
1,ENG-Premier League,Arsenal,83,83,85,82,0,2.100000e+10,Slow,Little,...,Deep,Narrow,Cover,8,7,33,24.91,23.27,FC 24,2024-06-12
2,ENG-Premier League,Aston Villa,81,83,79,78,0,6.955000e+07,Slow,Little,...,Deep,Narrow,Cover,6,5,33,25.73,24.12,FC 24,2024-06-12
3,ENG-Premier League,Brentford,77,77,75,76,0,2.205000e+07,Slow,Little,...,Deep,Narrow,Cover,3,2,33,26.36,24.33,FC 24,2024-06-12
4,ENG-Premier League,Brighton & Hove Albion,77,75,74,77,0,4.250000e+06,Slow,Little,...,Deep,Narrow,Cover,5,3,33,24.91,23.88,FC 24,2024-06-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,INT,Spain,83,83,84,83,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,9,26,27.00,26.69,FC 24,2024-06-12
122,INT,Sweden,77,82,76,74,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,5,26,26.64,26.85,FC 24,2024-06-12
123,INT,Ukraine,77,81,77,74,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,6,26,24.27,26.23,FC 24,2024-06-12
124,INT,United States,76,76,76,76,0,0.000000e+00,Slow,Little,...,Deep,Narrow,Cover,10,6,26,25.36,24.38,FC 24,2024-06-12


In [26]:
team_ratings_nat

Unnamed: 0,team,overall,attack,midfield,defence,transfer_budget,club_worth,build_up_speed,build_up_dribbling,build_up_passing,...,defence_team_width,defence_defender_line,defence_domestic_prestige,international_prestige,players,starting_xi_average_age,whole_team_average_age,fifa_edition,update,league
0,Argentina,83,85,83,82,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,9,26,28.64,27.96,FC 24,"Jun 12, 2024",INT
1,Belgium,81,82,81,77,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,8,26,27.73,26.23,FC 24,"Jun 12, 2024",INT
2,Croatia,79,77,82,77,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,6,26,27.18,26.46,FC 24,"Jun 12, 2024",INT
3,Czech Republic,75,74,75,75,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,5,26,25.45,25.62,FC 24,"Jun 12, 2024",INT
4,Denmark,79,76,78,79,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,6,26,27.27,26.35,FC 24,"Jun 12, 2024",INT
5,England,85,87,86,83,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,8,26,26.27,25.69,FC 24,"Jun 12, 2024",INT
6,Finland,71,70,71,67,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,3,26,28.73,26.5,FC 24,"Jun 12, 2024",INT
7,France,84,86,85,83,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,10,26,27.27,25.65,FC 24,"Jun 12, 2024",INT
8,Germany,85,81,85,83,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,10,26,27.73,28.04,FC 24,"Jun 12, 2024",INT
9,Ghana,75,74,77,73,€0,€0,Slow,Little,Short,...,Narrow,Cover,10,5,26,26.64,25.88,FC 24,"Jun 12, 2024",INT


In [30]:
#### LOGGING ####
LOG_FOLDER = "db/logs/"
LOG_FILE_NAME = "SOFIFA_teams_stats_table.log"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

try:
    log_dir = Path(sys.argv[0]).resolve().parents[3] / LOG_FOLDER
    log_dir.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
    log_file_path = log_dir / LOG_FILE_NAME
    logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, handlers=[
        logging.FileHandler(log_file_path),
        logging.StreamHandler(sys.stdout)
    ])
    logger = logging.getLogger(__name__)
    logger.info(f"path to logging file exists ? {os.path.exists(log_file_path)}")
    logger.info(f"logging file path: {log_file_path}")
except Exception as e:
    print(f"Failed to set up logging: {e}")
    sys.exit(1)


In [29]:
__name__

'__main__'