In [8]:
import pandas as pd
import psycopg2
import dotenv
import os

dotenv.load_dotenv()

PG_PASSWORD = os.getenv("PG_PASSWORD", "_KjE6\\2_JX0UGTJdZYl")
PG_USER = os.getenv("PG_USER", "busit_87")
PG_HOST = os.getenv("PG_HOST", "fuji.ucll.be")
PG_PORT = os.getenv("PG_PORT", "52425")
PG_DATABASE = os.getenv("PG_DB", "international_week")

conn = psycopg2.connect(
    host=PG_HOST,
    database=PG_DATABASE,
    user=PG_USER,
    password=PG_PASSWORD,
    port=PG_PORT,
    sslmode="allow",
)


In [9]:

def count_passes(game_id, our_team, conn, filter_sql):
    query = f"""
    WITH action_changes AS (
        SELECT
            a.*,
            LAG(a.team_id) OVER (ORDER BY a.period_id, a.seconds, a.id) AS prev_team_id,
            LEAD(a.team_id) OVER (ORDER BY a.period_id, a.seconds, a.id) AS next_team_id
        FROM
            public.spadl_actions a
        WHERE
            a.game_id = '{game_id}'
    ),
    possession_markers AS (
        SELECT
            *,
            CASE WHEN prev_team_id IS NULL OR team_id != prev_team_id THEN 1 ELSE 0 END AS is_new_possession
        FROM
            action_changes
    ),
    possession_groups AS (
        SELECT
            *,
            SUM(is_new_possession) OVER (ORDER BY period_id, seconds, id) AS possession_group
        FROM
            possession_markers
    ),
    count_rows_in_possession_group AS (
        SELECT
            *,
            COUNT(*) OVER (PARTITION BY possession_group ORDER BY period_id, seconds, id) AS rows_in_possession_group
        FROM
            possession_groups
    ),
    initial_value_per_group AS (
        SELECT
            possession_group,
            MIN(seconds) AS initial_seconds
        FROM
            count_rows_in_possession_group
        GROUP BY
            possession_group
    ),
    possession_stats AS (
        SELECT
            possession_group,
            team_id,
            COUNT(*) AS action_count,
            MAX(id) AS last_action_id
        FROM
            count_rows_in_possession_group
        GROUP BY
            possession_group, team_id
    ),
    final_query AS (
        SELECT
            c.*, 
            (c.seconds - i.initial_seconds) AS seconds_difference,
            ABS(c.start_x - c.end_x) AS action_distance
        FROM
            count_rows_in_possession_group c
        JOIN 
            initial_value_per_group i
        ON 
            c.possession_group = i.possession_group
    ),
    filters AS (
        SELECT *
        FROM final_query
        WHERE team_id = '{our_team}'
        AND possession_group IN (
            SELECT possession_group FROM possession_stats WHERE action_count >= 3
        )
    ),
    start_with_ball_table AS (
        SELECT *,
            CASE 
                WHEN FIRST_VALUE(prev_team_id) OVER (ORDER BY id) IS NULL AND FIRST_VALUE(end_x) OVER (ORDER BY id) < 52.5
                THEN 'LEFT_SIDE' 
                WHEN FIRST_VALUE(prev_team_id) OVER (ORDER BY id) IS NOT NULL AND FIRST_VALUE(end_x) OVER (ORDER BY id) < 52.5
                THEN 'RIGHT_SIDE'
                WHEN FIRST_VALUE(prev_team_id) OVER (ORDER BY id) IS NULL AND FIRST_VALUE(end_x) OVER (ORDER BY id) > 52.5
                THEN 'RIGHT_SIDE' 
                ELSE 'LEFT_SIDE' 
            END AS start_with_ball
        FROM filters
    ),
    start_with_period AS (
        SELECT *,
            CASE 
                WHEN start_with_ball = 'LEFT_SIDE' and period_id = 2
                THEN 'RIGHT_SIDE' 
                WHEN start_with_ball = 'RIGHT_SIDE' and period_id = 2
                THEN 'LEFT_SIDE' 
                WHEN start_with_ball = 'LEFT_SIDE' and period_id = 1
                THEN 'LEFT_SIDE' 
                WHEN start_with_ball = 'RIGHT_SIDE' and period_id = 1
                THEN 'RIGHT_SIDE' 
            END AS update_start_with_ball
        FROM start_with_ball_table
    ),
    direction AS (
        SELECT *,
            CASE 
            WHEN start_x > end_x AND update_start_with_ball = 'RIGHT_SIDE' THEN 'FORWARD'
            WHEN start_x < end_x AND update_start_with_ball = 'RIGHT_SIDE' THEN 'BACKWARD'
            WHEN start_x < end_x AND update_start_with_ball = 'LEFT_SIDE' THEN 'FORWARD'
            WHEN start_x > end_x AND update_start_with_ball = 'LEFT_SIDE' THEN 'BACKWARD'
            ELSE NULL
        END AS ballMoveDirection
        FROM start_with_period
    )

    SELECT COUNT(*) FROM direction {filter_sql}

    """
    return pd.read_sql_query(query, conn)



In [10]:
def count_average_passes_per_team(query):

    query_match = """
    SELECT 
        m.match_id, 
        m.home_team_id, 
        ht.team_name AS home_team_name, 
        m.away_team_id, 
        at.team_name AS away_team_name
    FROM public.matches m
    JOIN public.teams ht ON m.home_team_id = ht.team_id
    JOIN public.teams at ON m.away_team_id = at.team_id
    ORDER BY m.match_id ASC;
    """

    df = pd.read_sql_query(query_match, conn)

    team_stats = {}

    for row in df.itertuples(index=False):
        match_id, home_team_id, away_team_id, home_team_name, away_team_name = row.match_id, row.home_team_id, row.away_team_id, row.home_team_name, row.away_team_name

        home_passes = count_passes(match_id, home_team_id, conn, query)
        away_passes = count_passes(match_id, away_team_id, conn, query)


        if home_team_name not in team_stats:
            team_stats[home_team_name] = {'total_passes': 0, 'match_count': 0}
        team_stats[home_team_name]['total_passes'] += home_passes
        team_stats[home_team_name]['match_count'] += 1

        if away_team_name not in team_stats:
            team_stats[away_team_name] = {'total_passes': 0, 'match_count': 0}
        team_stats[away_team_name]['total_passes'] += away_passes
        team_stats[away_team_name]['match_count'] += 1

    team_data = []
    for team_name, stats in team_stats.items():
        avg_passes = stats['total_passes'] / stats['match_count']
        team_data.append((team_name, avg_passes))

    average_df = pd.DataFrame(team_data, columns=['team_name', 'avg_passes'])

    average_df_copy = average_df.copy()

    average_df_copy['avg_passes'] = (
        average_df_copy['avg_passes']
        .astype(str)
        .str.replace(r'count\s*0\s*', '', regex=True)
        .str.strip()
        .replace('', '0')  # Replace empty strings with '0' to avoid conversion errors
        .astype(float)  # Convert back to numeric
    )

    return average_df_copy

In [11]:
filter_sql_all = """WHERE seconds_difference <= 10 AND action_type IN ('0')"""

In [12]:
df_filter_sql_all = count_average_passes_per_team(filter_sql_all)

  df = pd.read_sql_query(query_match, conn)
  return pd.read_sql_query(query, conn)


In [13]:
df_filter_sql_all.sort_values(by='avg_passes', ascending=False)

Unnamed: 0,team_name,avg_passes
0,Club Brugge,247.315789
12,Sporting Charleroi,227.666667
13,Antwerp,220.888889
8,Genk,217.315789
3,Union Saint-Gilloise,215.722222
7,Sint-Truiden,212.333333
1,Mechelen,211.0
6,Anderlecht,200.684211
2,Dender,197.0
11,Gent,191.578947


In [14]:
filter_sql_forward = """WHERE seconds_difference <= 10 AND action_type IN ('0') AND ballMoveDirection = 'FORWARD'"""

In [15]:
df_filter_sql_forward = count_average_passes_per_team(filter_sql_forward)

  df = pd.read_sql_query(query_match, conn)
  return pd.read_sql_query(query, conn)


In [16]:
df_filter_sql_forward.sort_values(by='avg_passes', ascending=False)

Unnamed: 0,team_name,avg_passes
0,Club Brugge,124.947368
12,Sporting Charleroi,113.222222
8,Genk,110.421053
13,Antwerp,107.5
3,Union Saint-Gilloise,107.333333
7,Sint-Truiden,105.944444
1,Mechelen,105.277778
6,Anderlecht,100.0
2,Dender,98.0
11,Gent,94.210526


In [17]:
filter_sql_forward_and_20m_distance = """WHERE seconds_difference <= 10 AND action_type IN ('0') AND ballMoveDirection = 'FORWARD' AND action_distance>20"""

In [18]:
df_filter_sql_forward_and_20m_distance = count_average_passes_per_team(filter_sql_forward_and_20m_distance)

  df = pd.read_sql_query(query_match, conn)
  return pd.read_sql_query(query, conn)


In [19]:
df_filter_sql_forward_and_20m_distance.sort_values(by='avg_passes', ascending=False)

Unnamed: 0,team_name,avg_passes
3,Union Saint-Gilloise,18.722222
2,Dender,18.444444
13,Antwerp,17.444444
12,Sporting Charleroi,17.222222
0,Club Brugge,17.105263
15,Cercle Brugge,16.388889
14,Westerlo,16.277778
1,Mechelen,15.611111
5,OH Leuven,14.777778
4,Beerschot,14.529412


In [20]:
df_merged = df_filter_sql_forward_and_20m_distance.merge(df_filter_sql_forward, on='team_name', suffixes=('_forward_20m', '_forward')) 


In [21]:
df_merged['ratio'] = df_merged['avg_passes_forward_20m'] / df_merged['avg_passes_forward']
df_merged.sort_values(by='ratio', ascending=False)

Unnamed: 0,team_name,avg_passes_forward_20m,avg_passes_forward,ratio
2,Dender,18.444444,98.0,0.188209
14,Westerlo,16.277778,89.444444,0.181988
15,Cercle Brugge,16.388889,90.111111,0.181874
10,Kortrijk,14.277778,79.722222,0.179094
3,Union Saint-Gilloise,18.722222,107.333333,0.174431
5,OH Leuven,14.777778,90.611111,0.16309
13,Antwerp,17.444444,107.5,0.162274
9,Standard Liège,12.947368,80.578947,0.160679
4,Beerschot,14.529412,94.176471,0.154279
12,Sporting Charleroi,17.222222,113.222222,0.15211
