In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import col

def unique_team_names(city, category):    

    s3_input_path = f"s3://databricks-workspace-liga-mt-bucket/unity-catalog/bronze/{city}/*.parquet"

    silver_data = spark.read \
        .format("parquet") \
        .option("inferSchema", "false") \
        .load(s3_input_path)

    # Cast home_goals and away_goals to integers
    silver_data = silver_data.withColumn("home_goals", F.col("home_goals").cast("int")) \
                            .withColumn("away_goals", F.col("away_goals").cast("int"))

    # Add a Calculated RESULT column
    silver_data = silver_data.withColumn(
        "result",
        F.when(F.col("home_goals") > F.col("away_goals"), "HOME WIN")
        .when(F.col("home_goals") < F.col("away_goals"), "AWAY WIN")
        .otherwise("DRAW")
    )

    # Filter the DataFrame based on the category
    category_silver_data = silver_data.filter(col("category") == category)

    # Collect unique home and away teams
    unique_home_teams = category_silver_data.select("home_team").distinct()
    unique_away_teams = category_silver_data.select("away_team").distinct()

    # Union the two DataFrames to get all unique team names
    unique_teams = unique_home_teams.union(unique_away_teams).distinct()

    # Count unique team names
    num_unique_teams = unique_teams.count()

    # Flag indication of duplicate / inconsistent team naming
    if num_unique_teams % 2 != 0:
        print(f"There are an odd number of unique team names in the {city.capitalize()} {category} league. Please check for inconsistent team naming.")
        display(unique_teams)

    s3_output_path = f"s3://databricks-workspace-liga-mt-bucket/unity-catalog/silver/{city}/"
    
    silver_data.write \
        .format("parquet") \
        .mode("overwrite") \
        .save(s3_output_path)   

    print(f"Data for {city.capitalize()} transformed and written in parquet format to Amazon s3 storage.")   
    
    return silver_data, unique_teams    

cities = ["gdansk", "warsaw", "wroclaw", "krakow", "poznan", "slask"]
categories = ["2010/2011", "2012/2013", "2012/2014", "2014/2015", "2016/2017", "2018/2019"]

# Process all cities
for city in cities:
    for category in categories:
        unique_team_names(city, category)

Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
There are an odd number of unique team names in the Gdansk 2012/2013 league. Please check for inconsistent team naming.


home_team
Jaguar A
Escola PG B
Olimpia Elbląg
AP Karol Piątek
Jedynka Reda B
Legendani Gdańsk
Victoria Łęgowo
Czarni Pruszcz
TLG
Lider Dębogórze


Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
Data for Gdansk transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Warsaw transformed and written in parquet format to Amazon s3 storage.
Data for Wroclaw transformed and written in parquet format to Amazon s3 storage.
Data for Wroclaw transformed and writte

home_team
Lotnik Kryspnów
ASEREK
Słomniczanka B
MKS Trzebinia B
MKS Trzebinia
Ind. Proces B
Aserek Kryg
Krakus NHI
Krakus NH
Ind. Proces A


Data for Krakow transformed and written in parquet format to Amazon s3 storage.
Data for Krakow transformed and written in parquet format to Amazon s3 storage.
There are an odd number of unique team names in the Krakow 2014/2015 league. Please check for inconsistent team naming.


home_team
Lotnik Kryspinów
Kmita Zabierzów
Pcimianka Pcim A
Champions A
Bonito Mszana A
Bibiczanka B
Hutnik Kraków 14B
Sokół Słopnice B
AM Cracovia B
Bonito Mszana B


Data for Krakow transformed and written in parquet format to Amazon s3 storage.
There are an odd number of unique team names in the Krakow 2016/2017 league. Please check for inconsistent team naming.


home_team
Hutnik Kraków 17B
Lotnik Kryspinów
Hutnik Kraków 16B
Halniak MP A
Krakus Swosz. A
Dynamo Kraków II
Bonito Mszana A
Hutnik Kraków 16A
Bonito Mszana B
Triumf Alwernia


Data for Krakow transformed and written in parquet format to Amazon s3 storage.
There are an odd number of unique team names in the Krakow 2018/2019 league. Please check for inconsistent team naming.


home_team
Hutnik A
KS Podgórze
Bonito Mszana
Dynamo B
Dynamo A
Krakus Swo.
Krakus Swosz.
Hutnik B
Lotnik Kryspinów


Data for Krakow transformed and written in parquet format to Amazon s3 storage.
Data for Poznan transformed and written in parquet format to Amazon s3 storage.
Data for Poznan transformed and written in parquet format to Amazon s3 storage.
Data for Poznan transformed and written in parquet format to Amazon s3 storage.
There are an odd number of unique team names in the Poznan 2014/2015 league. Please check for inconsistent team naming.


home_team
Las Puszczykowo
Orlik Kaźmierz 15
Lipno Stęszew A
UKS Śrem
Orlik Każmierz 14
APR Tulce
Orlik Kaźmierz 15
Huragan Pobiedziska
APR Skórzewo
UKS 17 Baranowo


Data for Poznan transformed and written in parquet format to Amazon s3 storage.
Data for Poznan transformed and written in parquet format to Amazon s3 storage.
Data for Poznan transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
Data for Slask transformed and written in parquet format to Amazon s3 storage.
