In [0]:
%pip install python-dotenv

In [0]:
dbutils.library.restartPython()

In [0]:
from dotenv import load_dotenv
import os
import requests
import json

def fetch_teams():
    load_dotenv('../.env')

    headers = { 'X-Auth-Token': os.getenv('FOOTBALLDATA_TOKEN') }
    params = { 'season': 2025 }
    api_url = 'https://api.football-data.org/v4/competitions/PD/teams'

    try:
        resp = requests.get(api_url, headers=headers, params=params)
        resp.raise_for_status()

        with open('/Volumes/workspace/default/kickoff_volume/teams.json', 'w') as f:
            json.dump(resp.json(), f)
    except:
        print(f'An error ocurred during the FootballDataORG petition')
        raise

fetch_teams()

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, LongType, ArrayType

competition_schema = StructType([
    StructField("code", StringType(), True),
    StructField("emblem", StringType(), True),
    StructField("id", LongType(), True),
    StructField("name", StringType(), True),
    StructField("type", StringType(), True),
])

filters_schema = StructType([
    StructField("season", LongType(), True),
])

season_schema = StructType([
    StructField("currentMatchday", LongType(), True),
    StructField("endDate", StringType(), True),
    StructField("id", LongType(), True),
    StructField("startDate", StringType(), True),
    StructField("winner", StringType(), True),
])

area_schema = StructType([
    StructField("code", StringType(), True),
    StructField("flag", StringType(), True),
    StructField("id", LongType(), True),
    StructField("name", StringType(), True),
])

contract_schema = StructType([
    StructField("start", StringType(), True),
    StructField("until", StringType(), True),
])

coach_schema = StructType([
    StructField("contract", contract_schema, True),
    StructField("dateOfBirth", StringType(), True),
    StructField("firstName", StringType(), True),
    StructField("id", LongType(), True),
    StructField("lastName", StringType(), True),
    StructField("name", StringType(), True),
    StructField("nationality", StringType(), True),
])

running_comp_schema = StructType([
    StructField("code", StringType(), True),
    StructField("emblem", StringType(), True),
    StructField("id", LongType(), True),
    StructField("name", StringType(), True),
    StructField("type", StringType(), True),
])

squad_schema = StructType([
    StructField("dateOfBirth", StringType(), True),
    StructField("id", LongType(), True),
    StructField("name", StringType(), True),
    StructField("nationality", StringType(), True),
    StructField("position", StringType(), True),
])

team_schema = StructType([
    StructField("address", StringType(), True),
    StructField("area", area_schema, True),
    StructField("clubColors", StringType(), True),
    StructField("coach", coach_schema, True),
    StructField("crest", StringType(), True),
    StructField("founded", LongType(), True),
    StructField("id", LongType(), True),
    StructField("lastUpdated", StringType(), True),
    StructField("name", StringType(), True),
    StructField("runningCompetitions", ArrayType(running_comp_schema), True),
    StructField("shortName", StringType(), True),
    StructField("squad", ArrayType(squad_schema), True),
    StructField("staff", ArrayType(StringType()), True),
    StructField("tla", StringType(), True),
    StructField("venue", StringType(), True),
    StructField("website", StringType(), True),
])

root_schema = StructType([
    StructField("competition", competition_schema, True),
    StructField("count", LongType(), True),
    StructField("filters", filters_schema, True),
    StructField("season", season_schema, True),
    StructField("teams", ArrayType(team_schema), True),
])


In [0]:
df_bronze = (
    spark
    .read
    .format('json')
    .option('multiLine', 'true')
    .schema(root_schema)
    .load('/Volumes/workspace/default/kickoff_volume/teams.json')
)

df_bronze.printSchema()
display(df_bronze.limit(10))

In [0]:
df_bronze.write.mode('overwrite').option('overwriteSchema', 'true').saveAsTable('raw_teams')