In [None]:
import pandas as _hex_pandas
import datetime as _hex_datetime
import json as _hex_json

In [None]:
hex_scheduled = _hex_json.loads("false")

In [None]:
hex_user_email = _hex_json.loads("\"example-user@example.com\"")

In [None]:
hex_run_context = _hex_json.loads("\"logic\"")

In [None]:
hex_timezone = _hex_json.loads("\"America/Mexico_City\"")

In [None]:
hex_project_id = _hex_json.loads("\"7ca3556b-51dc-4ea6-8aec-d37fca3b80e3\"")

In [None]:
hex_project_name = _hex_json.loads("\"1_euro2024_data_ingest\"")

In [None]:
hex_status = _hex_json.loads("\"\"")

In [None]:
hex_categories = _hex_json.loads("[]")

In [None]:
hex_color_palette = _hex_json.loads("[\"#4C78A8\",\"#F58518\",\"#E45756\",\"#72B7B2\",\"#54A24B\",\"#EECA3B\",\"#B279A2\",\"#FF9DA6\",\"#9D755D\",\"#BAB0AC\"]")

# HOL: Soccer Euro Cup 2024 Prediction ⚽⚽⚽
**Building a Forecasting Model** with **Snowpark ML**

---

In this HOL, we'll be using Snowflake Notebook and Snowpark ML to build out an end-to-end forecasting model, starting from data ingestion through to model deployment. We will address all the steps in ML development, and demonstrate the various capabilities Snowflake provides with Snowpark ML and its supporting MLOps capabilities.


_The model aims to predict the winner of the Euro Cup 2024 — a polarizing subject for Matteo (Italy supporter) and Simon (England supporter)!_
_Are you ready? Let's start!_

![image](https://i.gifer.com/embedded/download/BiCu.gif)


In [None]:
import hextoolkit
hex_snowflake_conn = hextoolkit.get_data_connection('SCS-SIMON-EURO2024')
session = hex_snowflake_conn.get_snowpark_session()

In [None]:
# add version tracking
app_tag = {
    "origin": "sf_sit",
    "name": "hol_sport_predict",
    "version": '{major: 1, minor: 0}'
}

session.query_tag = app_tag

## Data Ingestion
---
Once the dataset package is imported into the PUBLIC.DATA stage, we can import it in our staging tables.

**(Time: 5 mins)**

- Step through the notebook
- Move data from __PUBLIC.DATA__ stage to tables
- Get an understanding of the dataset loaded
- Create additional cells to query the dataset further


In [None]:
# Ingesting Data.
# Using a nested SQL in a python script to ingest all dataset in one step.

# Create File Format for generic csv
session.sql(f'''
    CREATE OR REPLACE FILE FORMAT ff_generic_csv
        TYPE = CSV
        FIELD_DELIMITER = ','
        FIELD_OPTIONALLY_ENCLOSED_BY='"'
        PARSE_HEADER = True
        TRIM_SPACE = TRUE
        NULL_IF = ('NULL', 'null')
        ESCAPE_UNENCLOSED_FIELD= NONE
        ERROR_ON_COLUMN_COUNT_MISMATCH=false
        replace_invalid_characters=true
        date_format=auto
        time_format=auto
        timestamp_format=auto;
''').collect()

# Create File Format for results.csv (loaded adding unique id extra columns)
session.sql('''
    create or replace file format ff_results_csv
    	type=csv
        skip_header=1
        field_delimiter=','
        trim_space=true
        field_optionally_enclosed_by='"'
        replace_invalid_characters=true
        date_format=auto
        time_format=auto
        timestamp_format=auto; 
''').collect()

# Iterate on files to load into tables
tables = ["fixture", "rankings"]
for table in tables:
    session.sql(f'''
        CREATE OR REPLACE TABLE {table.upper()}
        USING TEMPLATE (
            SELECT ARRAY_AGG(object_construct(*))
            FROM TABLE(
                INFER_SCHEMA(
                    LOCATION=>'@data/{table}.csv',
                    FILE_FORMAT=>'ff_generic_csv',
                    IGNORE_CASE => TRUE
                )
            )
        );
    ''').collect()
    
    session.sql(f'''
        COPY INTO {table.upper()}
        FROM '@data/{table}.csv'
        FILE_FORMAT = ff_generic_csv
        MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE;
    ''').collect()

In [None]:
session.sql(f'''
    create or replace table results as (
        select 
            ROW_NUMBER() OVER (ORDER BY $1) AS id,
            $1::date as date, 
            $2 as home_team, 
            $3 as away_team, 
            $4::integer as home_team_score, 
            $5::integer as away_team_score, 
            $6 as tournament, 
            $7 as city, 
            $8 as country,
            (CASE WHEN $9 = 'TRUE' then 1 ELSE 0 END) as neutral
        from 
            @data/results.csv
        (file_format => 'ff_results_csv')
    );
''').collect()

[Row(status='Table RESULTS successfully created.')]

In [None]:
# need to handle a country spelling difference for Turkey/Türkiye

session.sql(f'''
    update rankings 
        set country_full = 'Türkiye'
        where country_abrv = 'TUR';
''').collect()

[Row(number of rows updated=332, number of multi-joined rows updated=0)]

In [None]:
# let's check our newly created tables

session.sql(f'''
    show tables in schema public;
''').show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"               |"database_name"  |"schema_name"  |"kind"  |"comment"                                           |"cluster_by"  |"rows"  |"bytes"  |"owner"   |"retention_time"  |"automatic_clustering"  |"change_tracking"  |"search_optimization"  |"search_optimization_progress"  |"search_optimization_bytes"  |"is_external"  |"enable_schema_evolution"  |"owner_role_type"  |"is_event"  |"budget"  |"is_hybrid"  |"is_iceberg"  |"is_dynamic"  |
----------------------------------

In [None]:
session.sql(f'''
  SELECT
    *
  FROM
    results
  WHERE
    home_team = 'England' 
    AND away_team = 'Germany'
    AND tournament = 'FIFA World Cup'
    AND YEAR(date) = 1966
''').show()

-------------------------------------------------------------------------------------------------------------------------------------------
|"ID"  |"DATE"      |"HOME_TEAM"  |"AWAY_TEAM"  |"HOME_TEAM_SCORE"  |"AWAY_TEAM_SCORE"  |"TOURNAMENT"    |"CITY"  |"COUNTRY"  |"NEUTRAL"  |
-------------------------------------------------------------------------------------------------------------------------------------------
|6525  |1966-07-30  |England      |Germany      |4                  |2                  |FIFA World Cup  |London  |England    |0          |
-------------------------------------------------------------------------------------------------------------------------------------------



In [None]:
# We also loaded the fixture of Euro Cup 2024 - these are the matches we'll predict the results. 
# Starting from group stage, through the knockout stage, up to the final.

session.table('fixture').show(51)

---------------------------------------------------------------------------------------------------------------------------------------
|"MATCH NUMBER"  |"ROUND NUMBER"  |"DATE"            |"LOCATION"             |"HOME TEAM"      |"AWAY TEAM"      |"GROUP"  |"RESULT"  |
---------------------------------------------------------------------------------------------------------------------------------------
|1               |1               |14/06/2024 19:00  |Fußball Arena München  |Germany          |Scotland         |Group A  |NULL      |
|2               |1               |15/06/2024 13:00  |Stadion Köln           |Hungary          |Switzerland      |Group A  |NULL      |
|3               |1               |15/06/2024 16:00  |Olympiastadion         |Spain            |Croatia          |Group B  |NULL      |
|4               |1               |15/06/2024 19:00  |BVB Stadion Dortmund   |Italy            |Albania          |Group B  |NULL      |
|5               |1               |16/06/2024 13