## Get Schedules for season 2025 of the espanish league

In [49]:
from get_soccer_data import get_soccer_data

In [50]:
df = get_soccer_data('scores','SchedulesBasic','ESP','2025')

https://api.sportsdata.io/v4/soccer/scores/json/SchedulesBasic/ESP/2025
Status Code: 200
✅ Saved: SchedulesBasic_ESP_2025.json and SchedulesBasic_ESP_2025.csv


In [51]:
import pandas as pd

In [52]:
df.columns

Index(['GameId', 'RoundId', 'Season', 'SeasonType', 'Group', 'AwayTeamId',
       'HomeTeamId', 'VenueId', 'Day', 'DateTime', 'Status', 'Week', 'Winner',
       'VenueType', 'AwayTeamKey', 'AwayTeamName', 'AwayTeamCountryCode',
       'AwayTeamScore', 'AwayTeamScorePeriod1', 'AwayTeamScorePeriod2',
       'AwayTeamScoreExtraTime', 'AwayTeamScorePenalty', 'HomeTeamKey',
       'HomeTeamName', 'HomeTeamCountryCode', 'HomeTeamScore',
       'HomeTeamScorePeriod1', 'HomeTeamScorePeriod2',
       'HomeTeamScoreExtraTime', 'HomeTeamScorePenalty', 'Updated',
       'UpdatedUtc', 'GlobalGameId', 'GlobalAwayTeamId', 'GlobalHomeTeamId',
       'IsClosed', 'PlayoffAggregateScore'],
      dtype='object')

In [None]:
# Function to prepare dataframe to be ingested in postgresql
import pandas as pd

def prepare_df(df:pd.DataFrame ):
    df.rename(columns={'Group':'Group_num'}, inplace= True) # replace table name reserved to database
    df = df.replace(r'^\s*$', None, regex=True)  # Change empty strings to 'None' values
    df = df.where(pd.notnull(df), None) # This replaces all 'NaN' values in the DataFrame with 'None'
    df.columns = df.columns.str.lower() # column names to lower case
    return df

In [None]:
# Prepare schedules dataframe for ingestion
df = prepare_df(df) 
df

Unnamed: 0,gameid,roundid,season,seasontype,group_num,awayteamid,hometeamid,venueid,day,datetime,...,hometeamscoreperiod2,hometeamscoreextratime,hometeamscorepenalty,updated,updatedutc,globalgameid,globalawayteamid,globalhometeamid,isclosed,playoffaggregatescore
0,79123,1548,2025,1,,617,616,105,2024-08-15T00:00:00,2024-08-15T17:00:00,...,0,,,2025-06-02T17:29:46,2025-06-02T21:29:46,90079123,90000617,90000616,True,
1,79125,1548,2025,1,,1169,625,114,2024-08-15T00:00:00,2024-08-15T19:30:00,...,0,,,2025-06-02T17:29:46,2025-06-02T21:29:46,90079125,90001169,90000625,True,
2,79126,1548,2025,1,,995,622,111,2024-08-16T00:00:00,2024-08-16T17:00:00,...,1,,,2025-06-02T17:29:46,2025-06-02T21:29:46,90079126,90000995,90000622,True,
3,79129,1548,2025,1,,606,629,118,2024-08-16T00:00:00,2024-08-16T19:30:00,...,1,,,2025-06-02T17:29:46,2025-06-02T21:29:46,90079129,90000606,90000629,True,
4,79121,1548,2025,1,,994,993,233,2024-08-17T00:00:00,2024-08-17T17:00:00,...,1,,,2025-06-02T17:29:46,2025-06-02T21:29:46,90079121,90000994,90000993,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,79493,1548,2025,1,,622,617,106,2025-05-24T00:00:00,2025-05-24T19:00:00,...,0,,,2025-06-02T17:29:47,2025-06-02T21:29:47,90079493,90000622,90000617,True,
376,79495,1548,2025,1,,1571,621,110,2025-05-24T00:00:00,2025-05-24T19:00:00,...,0,,,2025-06-02T17:29:47,2025-06-02T21:29:47,90079495,90001571,90000621,True,
377,79496,1548,2025,1,,554,1169,906,2025-05-25T00:00:00,2025-05-25T12:00:00,...,0,,,2025-06-02T17:29:47,2025-06-02T21:29:47,90079496,90000554,90001169,True,
378,79497,1548,2025,1,,606,630,119,2025-05-25T00:00:00,2025-05-25T14:15:00,...,1,,,2025-06-02T17:29:47,2025-06-02T21:29:47,90079497,90000606,90000630,True,


In [None]:
## Create connection to sql, loading enviromental variables

import psycopg2
from dotenv import load_dotenv
import os

load_dotenv(override=True) 
host = os.getenv("DB_HOST")
user = os.getenv("DB_USER")
DB_password = os.getenv("DB_PASSWORD")
database = os.getenv("DB_NAME")

pgconn = psycopg2.connect(
    host= host,
    user = user,
    password=DB_password,
    database = database
)


pgcursor = pgconn.cursor()

from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
pgconn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

pgcursor.execute('SELECT current_database()') # Check the connection
pgcursor.fetchone()





('soccerdatawarehouse',)

In [None]:
## Create Table schedule in bronze schema 
pgcursor.execute("""
 CREATE TABLE IF NOT EXISTS bronze.schedule 
(                 
  GameId INT,
  RoundId INT,
  Season INT,
  SeasonType INT,
  Group_num INT,
  AwayTeamId INT,
  HomeTeamId INT,
  VenueId INT,
  Day DATE,
  DateTime TIMESTAMP,
  Status VARCHAR(50),
  Week INT,
  Winner VARCHAR(50),
  VenueType VARCHAR(50),
  AwayTeamKey VARCHAR(50),
  AwayTeamName VARCHAR(50),
  AwayTeamCountryCode VARCHAR(50),
  AwayTeamScore INT,
  AwayTeamScorePeriod1 INT,
  AwayTeamScorePeriod2 INT,
  AwayTeamScoreExtraTime INT,
  AwayTeamScorePenalty INT,
  HomeTeamKey VARCHAR(50),
  HomeTeamName VARCHAR(50),
  HomeTeamCountryCode VARCHAR(50),
  HomeTeamScore INT,
  HomeTeamScorePeriod1 INT,
  HomeTeamScorePeriod2 INT,
  HomeTeamScoreExtraTime INT,
  HomeTeamScorePenalty INT,
  Updated TIMESTAMP,
  UpdatedUtc TIMESTAMP,
  GlobalGameId INT,
  GlobalAwayTeamId INT,
  GlobalHomeTeamId INT,
  IsClosed BOOLEAN,
  PlayoffAggregateScore INT,
);
""")

In [None]:
## Create Table games in bronze schema 
pgcursor.execute("""
 CREATE TABLE IF NOT EXISTS bronze.games 
(                 
    GameId INT,
    RoundId INT,
    Season INT,
    SeasonType INT,
    Group_num INT,
    AwayTeamId INT,
    HomeTeamId INT,
    VenueId INT,
    Day TIMESTAMP,
    DateTime TIMESTAMP,
    Status VARCHAR(255),
    Week INT,
    Period VARCHAR(255),
    Clock TIMESTAMP,
    Winner VARCHAR(255),
    VenueType VARCHAR(255),
    AwayTeamKey VARCHAR(255),
    AwayTeamName VARCHAR(255),
    AwayTeamCountryCode VARCHAR(255),
    AwayTeamScore INT,
    AwayTeamScorePeriod1 INT,
    AwayTeamScorePeriod2 INT,
    AwayTeamScoreExtraTime TIMESTAMP,
    AwayTeamScorePenalty TIMESTAMP,
    HomeTeamKey VARCHAR(255),
    HomeTeamName VARCHAR(255),
    HomeTeamCountryCode VARCHAR(255),
    HomeTeamScore INT,
    HomeTeamScorePeriod1 INT,
    HomeTeamScorePeriod2 INT,
    HomeTeamScoreExtraTime INT,
    HomeTeamScorePenalty INT,
    HomeTeamMoneyLine INT,
    AwayTeamMoneyLine INT,
    DrawMoneyLine INT,
    PointSpread FLOAT,
    HomeTeamPointSpreadPayout INT,
    AwayTeamPointSpreadPayout INT,
    OverUnder FLOAT,
    OverPayout INT,
    UnderPayout INT,
    Attendance INT,
    Updated TIMESTAMP,
    UpdatedUtc TIMESTAMP,
    GlobalGameId INT,
    GlobalAwayTeamId INT,
    GlobalHomeTeamId INT,
    ClockExtra TIMESTAMP,
    ClockDisplay TIMESTAMP,
    IsClosed BOOLEAN,
    HomeTeamFormation VARCHAR(255),
    AwayTeamFormation VARCHAR(255),
    PlayoffAggregateScore TIMESTAMP
);
""")

In [None]:
from sqlalchemy import create_engine


def df_to_posgres (user:str, host:str, password:str, database:str, df:pd.DataFrame, db_table:str,schema:str):
    # function to save dataframe in postgresql

    engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}/{database}?options=-csearch_path%3D{schema}')

    df.to_sql(db_table, engine, if_exists='append',index=False)

In [None]:
# Send dataframe to postgres schedule table in bronze schema
df_to_posgres (user, host, DB_password, database, df, 'schedule','bronze')


## Get Espanish 2025  Game Data

In [63]:
##  For API Request fetch gameid from database
pgcursor.execute('SELECT DISTINCT gameid from bronze.schedule')
gameid_2025_esp = pgcursor.fetchall()

In [None]:
gameid_2025_esp = [str(x[0]) for x in gameid_2025_esp] # convert to string for API request

In [None]:
len(gameid_2025_esp) # Espanish season have 380 oficial games, review amount of games id

380

In [None]:
## Test for the first record
for i in gameid_2025_esp[0:1]: # iterate over game id list
    df = get_soccer_data('Stats','BoxScoreFinal','ESP',i) # API request
    df=pd.json_normalize(df['Game'][0]) # normalize nested data in game column
    df = prepare_df(df) # prepare dataframe
    df_to_posgres(user, host, DB_password, database, df, 'games','bronze') #save dataframe in postgresql
    print(f'{i} complete') # print progress

https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79260
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79260.json and BoxScoreFinal_ESP_79260.csv
79260 complete


In [None]:
pgcursor.execute('SELECT * FROM bronze.games WHERE gameid = 79260;') # retrieve ingested dataframe for test
print(pgcursor.fetchone())

(79260, 1548, 2025, 1, None, 622, 624, 113, datetime.datetime(2024, 11, 30, 0, 0), datetime.datetime(2024, 11, 30, 17, 30), 'Final', 15, 'Regular', None, 'Scrambled', 'Home Away', 'CEL', 'RC Celta de Vigo', 'ESP', 0, 0, 0, None, None, 'ESP', 'RCD Espanyol de Barcelona', 'ESP', 1, 0, 0, None, None, 117, 59, 112, 0.1, -72, 57, 0.6, 52, -64, 23792, datetime.datetime(2025, 6, 2, 17, 29, 47), datetime.datetime(2025, 6, 2, 21, 29, 47), 90079260, 90000622, 90000624, None, None, True, '4-1-4-1', '3-5-1-1', None)


In [None]:

import time # since a free API service is used, time and count added to make an arbitrary delay to prevent API calls limitations 
count = 0

for i in gameid_2025_esp[1:]: ## iterate for remaining records
    df = get_soccer_data('Stats','BoxScoreFinal','ESP',i)
    df=pd.json_normalize(df['Game'][0])
    df = prepare_df(df)
    df_to_posgres(user, host, DB_password, database, df, 'games','bronze')
    print(f'{i} complete')
    count += 1
    if count % 50 == 0: # Delay every 50 records
        print("Waiting for 3 minutes...")
        time.sleep(180)

https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79433
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79433.json and BoxScoreFinal_ESP_79433.csv
79433 complete
https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79431
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79431.json and BoxScoreFinal_ESP_79431.csv
79431 complete
https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79326
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79326.json and BoxScoreFinal_ESP_79326.csv
79326 complete
https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79258
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79258.json and BoxScoreFinal_ESP_79258.csv
79258 complete
https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79467
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79467.json and BoxScoreFinal_ESP_79467.csv
79467 complete
https://api.sportsdata.io/v4/soccer/Stats/json/BoxScoreFinal/ESP/79329
Status Code: 200
✅ Saved: BoxScoreFinal_ESP_79329.json and BoxSc

In [None]:
pgcursor.execute('SELECT COUNT(*) FROM bronze.games;') # Check total of saved records 
print(pgcursor.fetchone())

(380,)
