In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')
import util

# Create output directory for features (won't modify the database)
OUTPUT_DIR = "features"
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Display settings for better notebook visualization
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 1000)

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')
import util
import pandas as pd

# Display settings for better notebook visualization
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 1000)

db = util.DatabaseConnection()

# Get all team IDs
team_query = "SELECT team_id, team_name FROM teams;"
teams_df = db.execute_query(team_query)

# Initialize a dictionary to store results for each team
all_transition_data = {}

# SQL query template
query_template = """
-- SIMPLIFIED DEFENSIVE TRANSITION ANALYSIS QUERY
WITH action_changes AS (
    SELECT
        a.*,
        LAG(a.team_id) OVER (PARTITION BY a.game_id ORDER BY a.period_id, a.seconds, a.id) AS prev_team_id,
        LEAD(a.team_id) OVER (PARTITION BY a.game_id ORDER BY a.period_id, a.seconds, a.id) AS next_team_id
    FROM
        public.spadl_actions a
),
possession_loss_moments AS (
    SELECT
        ac.id,
        ac.game_id,
        ac.period_id,
        ac.seconds AS loss_time,
        ac.team_id AS team_losing_ball,
        ac.next_team_id AS team_gaining_ball,
        ac.start_x AS loss_x,
        ac.start_y AS loss_y
    FROM
        action_changes ac
    WHERE
        ac.team_id = '{team_id}'
        AND ac.next_team_id IS NOT NULL
        AND ac.next_team_id != ac.team_id
),
actions_after_loss AS (
    SELECT
        plm.id AS loss_event_id,
        plm.game_id,
        plm.period_id,
        plm.loss_time,
        plm.team_losing_ball,
        plm.team_gaining_ball,
        plm.loss_x,
        plm.loss_y,
        a.id AS action_id,
        a.seconds AS action_time,
        (a.seconds - plm.loss_time) AS seconds_after_loss,
        a.team_id,
        t.team_name,
        a.player_id,
        p.player_name,
        a.action_type,
        a.result,
        a.start_x,
        a.start_y,
        a.end_x,
        a.end_y
    FROM
        possession_loss_moments plm
    JOIN
        spadl_actions a ON plm.game_id = a.game_id
            AND a.period_id = plm.period_id
            AND a.seconds > plm.loss_time
            AND a.seconds <= plm.loss_time + 10
    JOIN
        players p ON a.player_id = p.player_id
    JOIN
        teams t ON a.team_id = t.team_id
)
SELECT
    loss_event_id,
    game_id,
    period_id,
    loss_time,
    team_losing_ball,
    team_gaining_ball,
    loss_x,
    loss_y,
    action_id,
    seconds_after_loss,
    team_id,
    team_name,
    player_id,
    player_name,
    action_type,
    result,
    start_x,
    start_y,
    end_x,
    end_y
FROM 
    actions_after_loss
ORDER BY
    game_id,
    period_id,
    loss_time,
    seconds_after_loss;
"""

# Loop through all teams and execute the query for each
for _, team_row in teams_df.iterrows():
    team_id = team_row['team_id']
    team_name = team_row['team_name']
    
    print(f"Processing team: {team_name}")
    
    # Replace the placeholder with the actual team ID
    current_query = query_template.replace('{team_id}', team_id)
    
    # Execute the query
    team_data = db.execute_query(current_query)
    
    # Store the result
    all_transition_data[team_id] = team_data
    
    print(f"Found {len(team_data)} actions for {team_name}")

# Combine all data into a single DataFrame if needed
all_teams_df = pd.concat(all_transition_data.values(), ignore_index=True)

# Optional: Remove any columns you don't need
# columns_to_keep = [
#     'game_id', 'period_id', 'team_losing_ball', 'team_gaining_ball',
#     'seconds_after_loss', 'action_type', 'result',
#     'start_x', 'start_y', 'end_x', 'end_y'
# ]
# filtered_df = all_teams_df[columns_to_keep]

# Save to CSV
all_teams_df.to_csv('all_teams_transition_data.csv', index=False)

Database connection established
Processing team: Club Brugge
Found 10417 actions for Club Brugge
Processing team: Mechelen
Found 9244 actions for Mechelen
Processing team: Sporting Charleroi
Found 10450 actions for Sporting Charleroi
Processing team: Anderlecht
Found 9338 actions for Anderlecht
Processing team: Dender
Found 11536 actions for Dender
Processing team: Kortrijk
Found 10774 actions for Kortrijk
Processing team: Beerschot


In [4]:
print(filtered_df.head)

<bound method NDFrame.head of                           game_id  period_id           team_losing_ball          team_gaining_ball  seconds_after_loss action_type result  start_x  start_y   end_x   end_y
0       5oc8drrbruovbuiriyhdyiyok          1  1oyb7oym5nwzny8vxf03szd2h  b7jmo07lqav0wfe2mtlzyspak                 1.0          10      1   70.560   32.232  70.560  32.232
1       5oc8drrbruovbuiriyhdyiyok          1  1oyb7oym5nwzny8vxf03szd2h  b7jmo07lqav0wfe2mtlzyspak                 4.0           0      0   60.690   36.788  49.665  32.368
2       5oc8drrbruovbuiriyhdyiyok          1  1oyb7oym5nwzny8vxf03szd2h  b7jmo07lqav0wfe2mtlzyspak                 6.0           0      0   50.505   29.716  60.585  28.696
3       5oc8drrbruovbuiriyhdyiyok          1  1oyb7oym5nwzny8vxf03szd2h  b7jmo07lqav0wfe2mtlzyspak                 7.0           0      0   60.900   29.512  34.860  37.128
4       5oc8drrbruovbuiriyhdyiyok          1  1oyb7oym5nwzny8vxf03szd2h  b7jmo07lqav0wfe2mtlzyspak            