In [149]:
import os
import snowflake.snowpark.functions as F

In [150]:
path_utils = os.path.join(os.getcwd(),'Utils','utils.ipynb')
%run $path_utils

####################################################
Available packages:
    from snowflake.snowpark.session import Session
    from snowflake.snowpark import functions as F
    from snowflake.snowpark.types import *
    from datetime import datetime

    import configparser
    import os

####################################################

Available functions, for details use help(<function_name>:
    get_snowpark_session
    set_schema
    read_csv_from_stage
    create_hz_dim



In [151]:
session = get_snowpark_session()
set_schema('HZ_CLEAR_STRATEGY')

[Row(status='Statement executed successfully.')]

## DIM_CALENDAR

In [152]:
query_dim_calendar = """
    select distinct
        cast(d.date_of_game as date) as "date",
        year(cast(d.date_of_game as date)) as year,
        month(cast(d.date_of_game as date)) as month,
        day(cast(d.date_of_game as date)) as day,
        cast(to_char(cast(d.date_of_game as date),'%Y-%m') as varchar(7)) as season,
        cast(case when month(cast(d.date_of_game as date)) <= 3 then concat(year(cast(d.date_of_game as date)),'_01')
            when month(cast(d.date_of_game as date)) <= 6 then concat(year(cast(d.date_of_game as date)),'_02')
            when month(cast(d.date_of_game as date)) <= 9 then concat(year(cast(d.date_of_game as date)),'_03')
            else concat(year(cast(d.date_of_game as date)),'_04') end as varchar(7))  as year_quarter,
        case when month(cast(d.date_of_game as date)) <= 3 then '01'
            when month(cast(d.date_of_game as date)) <= 6 then '02'
            when month(cast(d.date_of_game as date)) <= 9 then '03' else'04' end as quarter,
        weekofyear(cast(d.date_of_game as date)) as week_of_year,
        dayofweek(cast(d.date_of_game as date)) as day_of_week,
        case when dayofweekiso(cast(d.date_of_game as date)) in (6,7) then 1 else 0 end as weekend    
    from pz_clear_strategy.football_data d
    where d.date_of_game is not null
    order by cast(d.date_of_game as date)
"""

df_dim_area_of_shot = session.sql(query_dim_calendar)
df_dim_area_of_shot.write.mode("overwrite").saveAsTable('dim_calendar')


## DIM_AREA_OF_SHOT

In [165]:
table_name = 'dim_area_of_shot'

query_new = f"""select distinct    
                    upper(d.area_of_shot) as area_of_shot,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.area_of_shot is not null"""

query_append = f"""select distinct 
                        upper(d.area_of_shot) as area_of_shot,
                        d.path,
                        sysdate() as load_at
                    from pz_clear_strategy.football_data d
                    where not exists (select 1 from hz_clear_strategy.{table_name} aos
                                            where aos.area_of_shot = upper(d.area_of_shot))
                    and d.area_of_shot is not null
                    and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})"""

create_hz_dim(table_name, query_new, query_append)

Table: dim_area_of_shot will be created
------------------------------------------------------
|"status"                                            |
------------------------------------------------------
|SEQ_DIM_AREA_OF_SHOT already exists, statement ...  |
------------------------------------------------------

Execution finished, dim_area_of_shot created


## DIM_SHOT_BASICS

In [154]:
table_name = 'dim_shot_basics'

query_new = f"""select distinct    
                    d.shot_basics,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.shot_basics is not null"""

query_append = f"""select distinct    
                        UPPER(d.shot_basics) AS shot_basics,
                        d.path,
                        sysdate() as load_at
                    from pz_clear_strategy.football_data d
                    where d.shot_basics is not null
                    and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                    and not exists (select 1 from hz_clear_strategy.{table_name} dsb
                                            where dsb.shot_basics = UPPER(d.shot_basics))"""

create_hz_dim(table_name, query_new, query_append)

Table: dim_shot_basics will be created
------------------------------------------------------
|"status"                                            |
------------------------------------------------------
|SEQ_DIM_SHOT_BASICS already exists, statement s...  |
------------------------------------------------------

Execution finished, dim_shot_basics created


## DIM_COMBINED_SHOT_TYPES

In [155]:
table_name = 'dim_combined_shot_types'

query_new = f"""select distinct
                upper(d.type_of_combined_shot) as combined_shot_type,
                d.path,
                sysdate() as load_at
            from pz_clear_strategy.football_data d
            where d.type_of_combined_shot is not null"""

query_append = f"""select distinct
                    upper(d.type_of_combined_shot) as combined_shot_type,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.type_of_combined_shot is not null
                  and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                and not exists (select 1 from hz_clear_strategy.{table_name} cst
                                        where cst.combined_shot_type = UPPER(d.type_of_combined_shot))
"""
create_hz_dim(table_name, query_new, query_append)

Table: dim_combined_shot_types will be created
------------------------------------------------------
|"status"                                            |
------------------------------------------------------
|SEQ_DIM_COMBINED_SHOT_TYPES already exists, sta...  |
------------------------------------------------------

Execution finished, dim_combined_shot_types created


## DIM_TEAM

In [156]:
table_name = 'dim_team'

query_new = f"""select distinct
                    cast(d.team_id as int) as team_id,
                    UPPER(d.team_name) as team_name,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.team_name is not null"""

query_append = f"""select distinct
                    cast(d.team_id as int) as team_id,
                    UPPER(d.team_name) as team_name,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.type_of_combined_shot is not null
                  and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                and not exists (select 1 from hz_clear_strategy.{table_name} dteam
                                        where dteam.team_name = UPPER(d.team_name))
"""
create_hz_dim(table_name, query_new, query_append)

Table: dim_team will be created
-----------------------------------------------------
|"status"                                           |
-----------------------------------------------------
|SEQ_DIM_TEAM already exists, statement succeeded.  |
-----------------------------------------------------

Execution finished, dim_team created


In [163]:
table_name = 'dim_team'

query_new = f"""select distinct
                    cast(d.team_id as int) as team_id,
                    UPPER(d.team_name) as team_name,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.team_name is not null"""

query_append = f"""select distinct
                    null as team_id,
                    upper(trim(regexp_replace(d.home,'^([^ ]+ [^ ]+)',''))) as team_name,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.home is not null
                and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                and not exists (select 1 from hz_clear_strategy.{table_name} dteam
                                        where dteam.team_name = upper(trim(regexp_replace(d.home,'^([^ ]+ [^ ]+)',''))))
"""
create_hz_dim(table_name, query_new, query_append)

-------------------------------------------------------------------------------------------------------------
|"ID_DIM_TEAM"  |"TEAM_ID"   |"TEAM_NAME"        |"PATH"                       |"LOAD_AT"                   |
-------------------------------------------------------------------------------------------------------------
|42             |1610612747  |MANCHESTER UNITED  |raw_data_stage/yds_data.csv  |2023-06-10 23:08:03.171000  |
-------------------------------------------------------------------------------------------------------------

Table: dim_team alread exists
Execution finished, dim_team appended


## DIM_RANGE_OF_SHOT

In [158]:
table_name = 'dim_range_of_shot'

query_new = f"""select distinct    
                    upper(d.range_of_shot) as range_of_shot,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.range_of_shot is not null"""

query_append = f"""select distinct    
                    upper(d.range_of_shot) as range_of_shot,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.range_of_shot is not null
                and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                and not exists (select 1 from hz_clear_strategy.{table_name} ros
                                        where ros.range_of_shot = UPPER(d.range_of_shot))
"""
create_hz_dim(table_name, query_new, query_append)

-------------------------------------------------------------------------
|"ID_DIM_RANGE_OF_SHOT"  |"RANGE_OF_SHOT"  |"LOAD_AT"                   |
-------------------------------------------------------------------------
|1                       |16-24 FT.        |2023-06-10 20:24:28.085000  |
-------------------------------------------------------------------------

Table: dim_range_of_shot alread exists
Table: dim_range_of_shot will be created
------------------------------------------------------
|"status"                                            |
------------------------------------------------------
|SEQ_DIM_RANGE_OF_SHOT already exists, statement...  |
------------------------------------------------------

Execution finished, dim_range_of_shot created


## DIM_SHOT_TYPES

In [159]:
table_name = 'dim_shot_types'

query_new = f"""select distinct
                    upper(d.type_of_shot) as shot_type,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.type_of_shot is not null"""

query_append = f"""select distinct    
                    upper(d.type_of_shot) as shot_type,
                    d.path,
                    sysdate() as load_at
                from pz_clear_strategy.football_data d
                where d.type_of_shot is not null
                and d.load_at >= (select dateadd(day, -1, max(load_at)) from {table_name})
                and not exists (select 1 from hz_clear_strategy.{table_name} st
                                        where st.type_of_shot = UPPER(d.type_of_shot))
"""
create_hz_dim(table_name, query_new, query_append)

Table: dim_shot_types will be created
------------------------------------------------------
|"status"                                            |
------------------------------------------------------
|SEQ_DIM_SHOT_TYPES already exists, statement su...  |
------------------------------------------------------

Execution finished, dim_shot_types created


## FT_SHOTS

In [160]:
query_shots = f"""
    select 
        cast(d.match_id as int) as match_id,
        cast(d.shot_id_number as int) as shot_id,        
        st.id_dim_shot_types,
        cst.id_dim_combined_shot_types,
        dt.id_dim_team,
        ros.id_dim_range_of_shot,   
        b.id_dim_shot_basics,
        aos.id_dim_area_of_shot,
        cast(location_x as int) as location_x,
        cast(location_y as int) as location_y,
        cast(d.remaining_min as int) as remaining_min_int,
        cast(d.remaining_min2 as float) as remaining_min_float,
        cast(d.power_of_shot as int) as power_of_shot_int,
        cast(d.power_of_shot3 as float) as power_of_shot_float,
        cast(d.remaining_sec as int) as remaining_sec_int,
        cast(d.remaining_sec5 as float) as remaining_sec_float,
        cast(d.distance_of_shot as int) as distance_of_shot_int,
        cast(d.distance_of_shot6 as float) as distance_of_shot_float,
        cast(d.is_goal as int) as is_goal,
        d.path,
        sysdate() as load_at
    from pz_clear_strategy.football_data d

    left join hz_clear_strategy.dim_shot_basics b
    on b.shot_basics = d.shot_basics

    left join hz_clear_strategy.dim_team dt
    on dt.team_name = upper(d.team_name)

    left join hz_clear_strategy.dim_area_of_shot aos
    on aos.area_of_shot = upper(d.area_of_shot)

    left join hz_clear_strategy.dim_range_of_shot ros
    on ros.range_of_shot = upper(d.range_of_shot)

    left join hz_clear_strategy.dim_shot_types st
    on st.shot_type = upper(d.type_of_shot)

    left join hz_clear_strategy.dim_combined_shot_types cst
    on cst.combined_shot_type = upper(d.type_of_combined_shot)

    where d.shot_id_number is not null
    and match_id = 20000012
"""

zone = 'hz_clear_strategy'

df_shots = session.sql(query_shots)

In [161]:
df_shots.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"MATCH_ID"  |"SHOT_ID"  |"ID_DIM_SHOT_TYPES"  |"ID_DIM_COMBINED_SHOT_TYPES"  |"ID_DIM_TEAM"  |"ID_DIM_RANGE_OF_SHOT"  |"ID_DIM_SHOT_BASICS"  |"ID_DIM_AREA_OF_SHOT"  |"LOCATION_X"  |"LOCATION_Y"  |"REMAINING_MIN_INT"  |"REMAINING_MIN_FLOAT"  |"POWER_OF_SHOT_INT"  |"POWER_OF_SHOT_FLOAT"  |"REMAINING_SEC_INT"  |"REMAINING_SEC_FLOAT"  |"DISTANCE_OF_SHOT_INT"  |"DISTANCE_OF_SHOT_FLOAT"  |"IS_GOAL"  |"PATH"                       |"LOAD_AT"                   |
----------------------------------------------------------------------------------