In [9]:
import pandas as pd
import numpy as np

events = pd.read_csv("data/processed/events_ucl_2018_19.csv")

In [10]:
passes = events[events['type'] == 'Pass'].copy()

In [11]:
team_possessions = (
    events.groupby(['match_id', 'team'])['possession']
    .nunique()
    .reset_index(name='num_possessions')
)

team_possessions['possession_pct'] = (
    team_possessions
    .groupby('match_id')['num_possessions']
    .transform(lambda x: x / x.sum())
)

In [12]:
possession_durations = (
    events.groupby(['match_id', 'team', 'possession'])['duration']
    .sum()
    .reset_index()
)

avg_possession_duration = (
    possession_durations
    .groupby(['match_id', 'team'])['duration']
    .mean()
    .reset_index(name='avg_possession_duration')
)

In [13]:
avg_pass_length = (
    passes
    .groupby(['match_id', 'team'])['pass_length']
    .mean()
    .reset_index(name='avg_pass_length')
)

In [14]:
passes['is_long'] = passes['pass_length'] > 30

long_pass_ratio = (
    passes
    .groupby(['match_id', 'team'])['is_long']
    .mean()
    .reset_index(name='long_pass_ratio')
)

In [15]:
passes['is_forward'] = passes['pass_angle'].between(-np.pi/4, np.pi/4)

forward_pass_ratio = (
    passes
    .groupby(['match_id', 'team'])['is_forward']
    .mean()
    .reset_index(name='forward_pass_ratio')
)

In [16]:
from functools import reduce

dfs = [
    team_possessions[['match_id', 'team', 'possession_pct']],
    avg_possession_duration,
    avg_pass_length,
    long_pass_ratio,
    forward_pass_ratio
]

team_features = reduce(
    lambda l, r: pd.merge(l, r, on=['match_id', 'team'], how='left'),
    dfs
)

In [17]:
team_features.describe()

Unnamed: 0,match_id,possession_pct,avg_possession_duration,avg_pass_length,long_pass_ratio,forward_pass_ratio
count,2.0,2.0,2.0,2.0,2.0,2.0
mean,22912.0,0.5,10.070679,24.837697,0.272408,0.360832
std,0.0,0.029369,3.354483,2.324147,0.044227,0.084023
min,22912.0,0.479233,7.698701,23.194276,0.241135,0.301418
25%,22912.0,0.489617,8.88469,24.015986,0.256771,0.331125
50%,22912.0,0.5,10.070679,24.837697,0.272408,0.360832
75%,22912.0,0.510383,11.256668,25.659407,0.288044,0.390539
max,22912.0,0.520767,12.442656,26.481117,0.303681,0.420245
