In this notebook, I'll look to persist the basic "enum tables" to my fbref football schema db. These tables include:
* country
* seasons
* teams
* competitions

# Imports

In [1]:
import os
os.chdir("../../")

In [2]:
import pandas as pd
from src.fbref.fbref_class import FBref
from src.utility.sql.fetch_and_persist import (
    create_db_engine,
    query_db,
    persist_to_db,
)

# Fetch and persist tables

### Instantiate fbref class

In [3]:
fb = FBref()

### Get country df

In [4]:
player_standard_df = fb.get_big5_player_stats(table_type='standard', season_name='2022-2023')

In [5]:
# prepare country data for df
country_list = player_standard_df.dropna().sort_values(by=['country'])['country'].unique()
country_data_dict = {
    'country_id' : range(1, len(country_list)+1),
    'country_code' : country_list
    
}

# create country df
country_df = pd.DataFrame(country_data_dict)

country_df.head(5)

Unnamed: 0,country_id,country_code
0,1,ALB
1,2,ALG
2,3,ANG
3,4,ARG
4,5,ARM


persist country df

In [15]:
# country df
persist_to_db(
    df_to_persist = country_df, 
    table_name = 'country',
    schema_name = 'fbref',
)

Data Persisted


### Get seasons df

Let's get comeptition links for the top five leageus where the competition links are used as input to get seasons for that specific competition

In [6]:
big5_df = fb.get_big_5_leagues()

competition_link_dict = dict(
    zip(
        big5_df['Competition Name'],
        big5_df['competition_link']
    )
)

Create season df using seasons from Premier League

In [7]:
# prepare season df data
prem_seasons_df = fb.get_competition_seasons(competition_link_dict['Premier League'])
season_list = list(prem_seasons_df.Season)

season_data_dict = {
    'season_id' : range(1, len(season_list)+1),
    'season_name' : season_list
}

season_df = pd.DataFrame(season_data_dict)
season_df.head(5)

Unnamed: 0,season_id,season_name
0,1,2022-2023
1,2,2021-2022
2,3,2020-2021
3,4,2019-2020
4,5,2018-2019


persist seasons df

In [16]:
# season df
persist_to_db(
    df_to_persist = season_df, 
    table_name = 'seasons',
    schema_name = 'fbref',
)


Data Persisted


### Get teams df

Get teams from top five countries

In [8]:
teams_df = pd.concat(
    [fb.get_teams_per_country(country) for country in ["England", "France", "Germany", "Spain", "Italy"] ]
)

In [9]:
teams_df = (
    teams_df
    .rename(
        columns={
            "Gender" : "gender",
            "Squad" : "team_name",
        }
    )
)[
    [
        'team_id',
        'gender', 
        'team_name',
    ]
]

In [10]:
teams_df

Unnamed: 0,team_id,gender,team_name
0,3b40c85f,M,1874 Northwich FC
1,912dc50a,M,Abbey Hey FC
2,d509b648,M,Abbey Rangers FC
3,d7bd6336,M,Abingdon Town FC
4,bbbaafbd,M,Abingdon United FC
...,...,...,...
174,0f16a817,M,USD Olginatese
175,bf51962a,M,USD Sestri Levante 1919
176,af5d5982,M,Venezia FC
177,2591250b,M,Vicenza Calcio


persist teams df

In [18]:
# teams df
persist_to_db(
    df_to_persist = teams_df, 
    table_name = 'teams',
    schema_name = 'fbref',
)

Data Persisted


### Get competition df

fetch country df from db

In [20]:
db_country_df = query_db('SELECT * FROM fbref.country')

Prepare comeptition table

In [21]:
competition_df = big5_df[big5_df.competition_id.isin(['9', '12', '13', '20', '11'])]

competition_df = (
    competition_df
    # clean country values to codes
    .assign(
        Country = competition_df.Country.apply(lambda x: x.split(' ')[1])
    )
    # merge with country df to get country id
    .merge(db_country_df, left_on='Country', right_on='country_code')
    .rename(
        columns = {
            'Gender': 'gender',
            'Competition Name':'competition_name',
        }
    )
    .astype({'competition_id': "int32"})
)


In [23]:
competition_df = competition_df[
    [
        'competition_id',
        'country_id',
        'gender',
        'competition_name',
        'competition_link',
    ]
]
competition_df

Unnamed: 0,competition_id,country_id,gender,competition_name,competition_link
0,9,33,M,Premier League,https://fbref.com/en/comps/9/history/Premier-L...
1,12,35,M,La Liga,https://fbref.com/en/comps/12/history/La-Liga-...
2,13,37,M,Ligue 1,https://fbref.com/en/comps/13/history/Ligue-1-...
3,20,41,M,Fußball-Bundesliga,https://fbref.com/en/comps/20/history/Bundesli...
4,11,56,M,Serie A,https://fbref.com/en/comps/11/history/Serie-A-...


In [24]:
# teams df
persist_to_db(
    df_to_persist = competition_df, 
    table_name = 'competitions',
    schema_name = 'fbref',
)

Data Persisted
