# Predicting the Euro2024 Winner

---

### 1. Show Fixture Data

**Objective:** Provide an overview of the fixture data for the Euro Cup 2024.

- **Initial Fixture Table**: Make a copy of the fixture table to preserve the original data.

### 2. Prep Data for Prediction

**Objective:** Prepare the data for predictive modeling.

- **Data Preprocessing**: Clean and transform the fixture data as necessary for prediction tasks.

### 3. Predict Results

**Objective:** Utilize predictive models to forecast match outcomes.

- **Prediction Process**: Employ suitable algorithms to predict match results.
- **Handling Shootouts**: Implement mechanisms to manage shootouts for knockout stage predictions.

### 4. Show Results

**Objective:** Present the predicted results of the Euro Cup 2024 matches.

- **Display Predicted Outcomes**: Showcase the forecasted results of each match.


In [None]:
import snowflake.snowpark
from snowflake.snowpark.session import Session
from snowflake.snowpark import Window
from snowflake.snowpark import functions as F   
from snowflake.snowpark.functions import udf, udtf
from snowflake.snowpark.types import IntegerType, FloatType, StringType, StructField, StructType, DateType
    
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import streamlit as st

import warnings
warnings.filterwarnings('ignore')

In [None]:
from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
user_name = session.sql('select current_user()').collect()[0][0]

# Group Stage

In [None]:
# adjustable threashold.
#
# setting at 0.5 introduces too many penalty shootouts

win_threshold = 0.45

In [None]:
# lets make a copy of the fixtures, one with original data and then one we'll update as we go along
df_fixture_copy = session.table('fixture')
df_fixture_copy.write.save_as_table(f'fixture_{user_name}',mode='overwrite')

# get list of fixtures for the groups stages
df_round_1 = (
    session.table(f'fixture_{user_name}')
    .select(
        F.col('"MATCH NUMBER"').alias("id"),
        F.col('"ROUND NUMBER"').alias('round'),
        F.to_date(F.col('"DATE"'), "DD/MM/YYYY HH24:MI").alias("date"),
        F.col('"HOME TEAM"').alias('team_1'),
        F.col('"AWAY TEAM"').alias('team_2'),
        F.col('"GROUP"').alias('group')
    )
)

# note we can also use the .show(n) at the end if we want to see all data
df_round_1.show(51)

In [None]:
# call the sproc that will run all our feature engineering code on the games we want to predict
#
# note - the 0 and 36 params refer to fixtures, we are only processing the feature engineering code for fixtures 0 through 36
# but...because we predict for both games twice we end up with 72 rows.

session.call('prep_prediction_data',0,36)

df_pred = session.table('data_for_predictions').order_by('id')

df_pred.show(72)

In [None]:
# run predictions on all the group stage games
from snowflake.ml.registry import Registry

reg = Registry(session=session)

pred_df = session.table('data_for_predictions').order_by('id')
mv = reg.get_model("EURO_24_GAME_PREDICT").default

prediction = mv.run(pred_df, function_name="predict_proba")

prediction = prediction.with_column('output_game_outcome',F.iff(F.col('predict_proba_1') > win_threshold,1,0))
prediction.write.save_as_table('predictions',mode='overwrite',table_type='temp')

session.table('predictions').select(
    prediction.col('id'),
    prediction.col('team_1'),
    prediction.col('output_game_outcome')
).order_by('id').show(72)

In [None]:
-- We can also run the prediction in SQL, where we can call our model using:
-- MODEL_NAME!PREDICT_PROBA()

SELECT 
    "ID", 
    "TEAM_1",  
    CAST ("TMP_RESULT"['PREDICT_PROBA_0'] AS DOUBLE) AS "PREDICT_PROBA_0",  
    CAST ("TMP_RESULT"['PREDICT_PROBA_1'] AS DOUBLE) AS "PREDICT_PROBA_1" 
FROM 
(
    WITH SNOWPARK_ML_MODEL_INFERENCE_INPUT AS (
        SELECT  
            *  
        FROM 
            data_for_predictions 
        ORDER BY "ID" ASC
    ),
    MODEL_VERSION_ALIAS AS MODEL SUMMIT_EURO2024.PUBLIC.EURO_24_GAME_PREDICT VERSION V_7
                
    SELECT 
        *,
        MODEL_VERSION_ALIAS!PREDICT_PROBA(
            ID, 
            NEUTRAL, 
            TEAM_1_GOAL_DIFF, 
            TEAM_1_TTL_WINS, 
            TEAM_1_TTL_LOSSES, 
            TEAM_2_GOAL_DIFF, 
            TEAM_2_TTL_WINS, 
            TEAM_2_TTL_LOSSES, 
            TEAM_1_VS_TEAM_2_RANK
        ) AS TMP_RESULT
    FROM 
        SNOWPARK_ML_MODEL_INFERENCE_INPUT
)

In [None]:
# lets take the output of our predictions 
# 
# we restructure this into the groups sorted by points to determine the teams that go through
# 1st & 2nd, along with the 4 teams who ranked the highest in 3rd place (confusing right?) 

session.call('process_group_predictions')

# we should have our final tables for completed group stages

session.table('results_group_stage').filter(F.col('group') == 'Group A').order_by(F.col('group'),F.col('rank').asc()).show()
session.table('results_group_stage').filter(F.col('group') == 'Group B').order_by(F.col('group'),F.col('rank').asc()).show()
session.table('results_group_stage').filter(F.col('group') == 'Group C').order_by(F.col('group'),F.col('rank').asc()).show()
session.table('results_group_stage').filter(F.col('group') == 'Group D').order_by(F.col('group'),F.col('rank').asc()).show()
session.table('results_group_stage').filter(F.col('group') == 'Group E').order_by(F.col('group'),F.col('rank').asc()).show()
session.table('results_group_stage').filter(F.col('group') == 'Group F').order_by(F.col('group'),F.col('rank').asc()).show()

# Round of 16 

In [None]:
# take the results of the group stage and insert the team names inmto the Round of 16 fixtures
# now we see who will be playing each each in the next phase

session.call('calculate_r16_games')
df_results = session.table(f'fixture_{user_name}').filter(F.col('"ROUND NUMBER"') == 'Round of 16').order_by('"MATCH NUMBER"')

# we can also use Streamlit to show snowpark dataframes (though this is only compatible for Snowflake Notebooks!)
# otherwise use df_results.show(8)

st.dataframe(df_results,use_container_width=True)

In [None]:
# run the feature engineering code
# note the different fixture id parameters

session.call('prep_prediction_data',37,44)
session.table('data_for_predictions').order_by('id').show(16)

In [None]:
# run predictions on all the games
from snowflake.ml.registry import Registry

reg = Registry(session=session)

pred_df = session.table('data_for_predictions').order_by('id')
mv = reg.get_model("EURO_24_GAME_PREDICT").default

prediction = mv.run(pred_df, function_name="predict_proba")

prediction = prediction.with_column('output_game_outcome',F.iff(F.col('predict_proba_1') > win_threshold,1,0))
prediction.write.save_as_table('predictions',mode='overwrite',table_type='temp')

session.table('predictions').select(
    prediction.col('id'),
    prediction.col('team_1'),
    prediction.col('output_game_outcome')
).order_by('id').show(16)

In [None]:
# lets process the predictions into a format where we can see the winners
#
# note - since this is a knockout stage, any draws will have to be resolved by a penalty shootout

session.call('process_knockout_predictions')
session.table('results_ko_stage').order_by('match_id').show()

# Quarter-finals

In [None]:
# see the quarter final games as a result of the previous round

session.call('calc_knockout_games','Quarter Finals')
session.table(f'fixture_{user_name}').filter(F.col('"ROUND NUMBER"') == 'Quarter Finals').order_by('"MATCH NUMBER"').show()

In [None]:
# feature engineering code

session.call('prep_prediction_data',45,48)
session.table('data_for_predictions').order_by('id').show(8)

In [None]:
# run predictions on all the games
from snowflake.ml.registry import Registry

reg = Registry(session=session)

pred_df = session.table('data_for_predictions').order_by('id')
mv = reg.get_model("EURO_24_GAME_PREDICT").default

prediction = mv.run(pred_df, function_name="predict_proba")

prediction = prediction.with_column('output_game_outcome',F.iff(F.col('predict_proba_1') > win_threshold,1,0))
prediction.write.save_as_table('predictions',mode='overwrite',table_type='temp')

session.table('predictions').select(
    prediction.col('id'),
    prediction.col('team_1'),
    prediction.col('output_game_outcome')
).order_by('id').show(8)

In [None]:
# process the predictions to see the winner 

session.call('process_knockout_predictions')
session.table('results_ko_stage').order_by('match_id').show()

# Semi-finals 

In [None]:
# calculate the fixtures

session.call('calc_knockout_games','Semi Finals')
session.table(f'fixture_{user_name}').filter(F.col('"ROUND NUMBER"') == 'Semi Finals').order_by('"MATCH NUMBER"').show()

In [None]:
# feature engineering code

session.call('prep_prediction_data',49,50)
session.table('data_for_predictions').order_by('id').show(4)

In [None]:
# run predictions on all the games
from snowflake.ml.registry import Registry

reg = Registry(session=session)

pred_df = session.table('data_for_predictions').order_by('id')
mv = reg.get_model("EURO_24_GAME_PREDICT").default

prediction = mv.run(pred_df, function_name="predict_proba")

prediction = prediction.with_column('output_game_outcome',F.iff(F.col('predict_proba_1') > win_threshold,1,0))
prediction.write.save_as_table('predictions',mode='overwrite',table_type='temp')

session.table('predictions').select(
    prediction.col('id'),
    prediction.col('team_1'),
    prediction.col('output_game_outcome')
).order_by('id').show(4)

In [None]:
# process the predictions to see the winner

session.call('process_knockout_predictions')
session.table('results_ko_stage').order_by('match_id').show()

# Final 

In [None]:
# calculate the final game

session.call('calc_knockout_games','Final')
session.table(f'fixture_{user_name}').filter(F.col('"ROUND NUMBER"') == 'Final').order_by('"MATCH NUMBER"').show()

In [None]:
# feature engineering code

session.call('prep_prediction_data',51,51)
session.table('data_for_predictions').order_by('id').show(2)

# And the winner is... 

In [None]:
# run predictions on the final 
import streamlit as st

from snowflake.ml.registry import Registry

reg = Registry(session=session)

pred_df = session.table('data_for_predictions').order_by('id')
mv = reg.get_model("EURO_24_GAME_PREDICT").default

prediction = mv.run(pred_df, function_name="predict")

prediction.write.save_as_table('predictions',mode='overwrite',table_type='temp')

session.table('predictions').select(
    prediction.col('id'),
    prediction.col('team_1'),
    prediction.col('output_game_outcome')
).order_by('id').show()

session.call('process_knockout_predictions')
session.table('results_ko_stage').order_by('match_id').show()

st.snow()