In [None]:
import matplotlib.pyplot as plt
plt.style.use('default')
plt.rc('text', usetex=True)
plt.rcParams.update({'font.family': 'serif'})
plt.rcParams.update({'font.size': 16})
import numpy as np
import os
import pandas as pd
import seaborn as sb
sb.color_palette("tab10")
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_io as tfio
import time
os.chdir('Change this to your directory!') # Change this to your directory!
tf.random.set_seed(42)

# Preparations
First we need the data, right.

In [None]:
start = time.time()
position_features = ['x','y']
pose_features = position_features + ['body']
vel_features = ['vx','vy']
heteroparam_features = [
    'dash_power_rate',
    'effort_min',
    'effort_max',
    'extra_stamina',
    'inertia_moment',
    'kick_rand',
    'kickable_margin',
    'player_decay'
]
feature_columns = [
    *(f"ball_{feature}" for feature in position_features + vel_features),
    *(f"{side}{unum}_{feature}" for side in ('l', 'r') for unum in range(1,12) for feature in pose_features + vel_features + heteroparam_features),
    *(f"self_{feature}" for feature in pose_features + vel_features + heteroparam_features)
]
output_columns = [
    'playercommand_type',
    'dash_power',
    'dash_direction',
    'turn_moment',
    'kick_power',
    'kick_direction',
    'tackle_direction'
]
column_defaults = [
    *(0.0 for _ in feature_columns),
    'nop',
    *(np.NaN for _ in output_columns[1:])
]
print(f"Finished in {time.time()-start} sec")

This specifies our batch size, and we fetch just one.

The dataset is already shuffled BY FRAMES, but each frame makes 10 datapoints (one for each of the 10 players commands issued) and these are grouped in contiguous windows of size 10.

Since we have 1400 matches, starting at the 1400th one, there's a $(1399/1400)^n$ chance there's at least one match missing in a sequence of $n$ matches ($10n$ frames).

Doing $1-(1399/1400)^n = 0.99$ we have a $99\%$ chance of having at least one frame from each match if we sample $10n = 64449$ rows.

Because of this, we need to shuffle the data using at least `shuffle_buffer_size=64449`.

In [None]:
start = time.time()
BATCH_SIZE = 100000 # 100k rows is around 0.16 % of the dataset
dataset = tf.data.experimental.make_csv_dataset(
    'indiv_arch_dataset.csv.gz',
    column_defaults=column_defaults,
    batch_size=BATCH_SIZE,
    compression_type='GZIP',
    shuffle=True,
    shuffle_buffer_size=64449
).take(1) # Dataset with only 1 batch
print(f"Finished in {time.time()-start} sec")

Convert the dataset into a Dataframe for analysis

In [None]:
start = time.time()
df = pd.DataFrame.from_dict(
    data=dict(next(dataset.as_numpy_iterator()).items()), # Because we have a single batch
    dtype=np.float32
)
print(f"Finished in {time.time()-start} sec")

#### **Important**: This allows us to use latex with column names that have '_' characters

In [None]:
df = df.rename(lambda s: s.replace('_', '\\_'), axis='columns')

We now fix the normalization of speeds which have been wrongly calculated

In [None]:
start = time.time()
##
def correct_vel_normalizations(df):
    cols = (df.columns)
    ball_speed_max = 3
    ball_rand = 0.05
    ball_decay = 0.94
    ##
    player_speed_max = 1.05
    player_rand = 0.1
    default_player_decay = 0.4
    player_decay_delta = 0.1
    ##
    # Ball
    ##
    if 'ball\\_vx' in cols and 'ball\\_vy' in cols:
        ##
        # Undo wrong ball speed domain normalization
        ##
        df['ball\\_vx'] *= ball_speed_max
        df['ball\\_vy'] *= ball_speed_max
        ##
        # Execute right ball speed domain normalization
        ##
        vel_norms = np.sqrt(np.square(df['ball\\_vx']) + np.square(df['ball\\_vy']))
        max_noise_contrib = vel_norms * ball_rand
        df['ball\\_vx'] /= ball_decay * (ball_speed_max + max_noise_contrib)
        df['ball\\_vy'] /= ball_decay * (ball_speed_max + max_noise_contrib)
    elif 'ball\\_vx' in cols or 'ball\\_vy' in cols:
        print('ERROR: Cannot normalize ball velocities without both components! Skipping.')
    ##
    # Players
    ##
    for side in ('l','r'):
        for unum in range(1,12):
            vx_str = f'{side}{unum}\\_vx'
            vy_str = f'{side}{unum}\\_vy'
            player_decay_str = f'{side}{unum}\\_player_decay'
            if vx_str in cols and vy_str in cols:
                ##
                # Undo wrong player speed domain normalization
                ##
                df[vx_str] *= player_speed_max
                df[vy_str] *= player_speed_max
                ##
                # Execute right player speed domain normalization
                ##
                vel_norms = np.sqrt(np.square(df[vx_str]) + np.square(df[vy_str]))
                max_noise_contrib = vel_norms * player_rand
                pdecays = default_player_decay + player_decay_delta * df[player_decay_str]
                df[vx_str] /= pdecays * (player_speed_max + max_noise_contrib)
                df[vy_str] /= pdecays * (player_speed_max + max_noise_contrib)
            elif vx_str in cols or vy_str in cols:
                print(f'ERROR: Cannot normalize {side}{unum} velocities without both components! Skipping.')
                continue
    ##
    # Self
    ##
    if 'self\\_vx' in cols and 'self\\_vy' in cols:
        ##
        # Undo wrong player speed domain normalization
        ##
        df[f'self\\_vx'] *= player_speed_max
        df[f'self\\_vy'] *= player_speed_max
        ##
        # Execute right player speed domain normalization
        ##
        vel_norms = np.sqrt(np.square(df['self\\_vx']) + np.square(df['self\\_vy']))
        max_noise_contrib = vel_norms * player_rand
        pdecays = default_player_decay + player_decay_delta * df['self\\_player\\_decay']
        df[f'self\\_vx'] /= pdecays * (player_speed_max + max_noise_contrib)
        df[f'self\\_vy'] /= pdecays * (player_speed_max + max_noise_contrib)
    elif 'self\\_vx' in cols or 'self\\_vy' in cols:
        print('ERROR: Cannot normalize self velocities without both components! Skipping.')
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
correct_vel_normalizations(df)
print(f"Finished in {time.time()-start} sec")

In [None]:
df.info()
df.head()

# Feature Space Exploration

Let's get a random sample of the dataset to visualize stuff.
First, specify our features.

## Joint distributions
Since we normalized the field positions, we should see them in $[-1, 1]$

In [None]:
def plotjoint(x, y, xlabel=None, ylabel=None, alpha=1, rasterized=False):
    if xlabel is None and hasattr(x,'name'):
        xlabel = x.name
    if ylabel is None and hasattr(y,'name'):
        ylabel = y.name
        
    def myscatter(*args, **kwargs):
        plt.scatter(s=.1, alpha=alpha, rasterized=rasterized, *args, **kwargs)
    
    p = sb.JointGrid(
        x = x,
        y = y
    )
    p.set_axis_labels(xlabel,ylabel)
    p.plot_joint(myscatter)
    sb.histplot(
        x=x,
        ax=p.ax_marg_x,
        kde=True
    )
    sb.histplot(
        y=y,
        ax=p.ax_marg_y,
        kde=True
    )
    return p

In [None]:
def plotjointgrouped(x_col, y_col, group_col, df, exclude=[], xlabel=None, ylabel=None, colors={}, alphas={}, rasterized=False, kde=True):
    def colored_scatter(x, y, c=None, alpha=None):
        def scatter(*args, **kwargs):
            args = (x, y)
            if c is not None:
                kwargs['c'] = c
            if alpha is not None:
                kwargs['alpha'] = alpha
            kwargs['s'] = 1
            plt.scatter(rasterized=rasterized, *args, **kwargs)

        return scatter
    
    p = sb.JointGrid(
        x = x_col,
        y = y_col,
        data = df
    )
    p.set_axis_labels(xlabel,ylabel)
    legends = []
    legend_colors = []
    for name, group in df.groupby(group_col):
        name = name.decode('utf8')
        if name in exclude: continue
        legends.append(name)
        color = colors.get(name, 'black')
        legend_colors.append(color)
        alpha = alphas.get(name, 0.1)
        p.plot_joint(
            colored_scatter(group[x_col], group[y_col], color, alpha)
        )
        sb.histplot(
            x=group[x_col].values,
            ax=p.ax_marg_x,
            color=color,
            kde=kde
        )
        sb.histplot(
            y=group[y_col].values,
            ax=p.ax_marg_y,
            color=color,
            kde=kde
        )
    lgnd = plt.legend(legends, markerscale=10, loc='upper right', prop={'size': 10})
    return p

### Ball

In [None]:
start = time.time()
plotjoint(df['ball\\_x'],df['ball\\_y'], 'Ball x', 'Ball y').savefig('ballx_bally.pdf', format='pdf')
plotjoint(df['ball\\_vx'],df['ball\\_vy'], 'Ball vx', 'Ball vy').savefig('ballvx_ballvy.pdf', format='pdf')
print(f"Finished in {time.time()-start} sec")

### Helios field players

In [None]:
start = time.time()
xs = pd.concat(tuple(df[f'r{unum}\\_x'] for unum in range(2,12)))
ys = pd.concat(tuple(df[f'r{unum}\\_y'] for unum in range(2,12)))
bodies = pd.concat(tuple(df[f'r{unum}\_body'] for unum in range(2,12)))
vxs = pd.concat(tuple(df[f'r{unum}\\_vx'] for unum in range(2,12)))
vys = pd.concat(tuple(df[f'r{unum}\\_vy'] for unum in range(2,12)))
print(f"Finished in {time.time()-start} sec")
print(vxs[1:20])

In [None]:
start = time.time()
plotjoint(xs, ys, 'helios\\_field\\_players\\_xs','helios\\_field\\_players\\_ys')
plotjoint(xs, bodies, 'helios\\_field\\_players\\_xs','helios\\_field\\_players\\_bodies')
plotjoint(bodies, ys, 'helios\\_field\\_players\\_bodies','helios\\_field\\_players\\_ys')
plotjoint(vxs, vys, 'helios\\_field\\_players\\_vxs','helios\\_field\\_players\\_vys')
plotjoint(xs, vxs, 'helios\\_field\\_players\\_xs','helios\\_field\\_players\\_vxs')
plotjoint(vxs, ys, 'helios\\_field\\_players\\_vxs','helios\\_field\\_players\\_ys')
plotjoint(xs, vys, 'helios\\_field\\_players\\_xs','helios\\_field\\_players\\_vys')
plotjoint(vys, ys, 'helios\\_field\\_players\\_vys','helios\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

### Other field players 

In [None]:
start = time.time()
others_xs = pd.concat(tuple(df[f'l{unum}\\_x'] for unum in range(2,12)))
others_ys = pd.concat(tuple(df[f'l{unum}\\_y'] for unum in range(2,12)))
others_bodies = pd.concat(tuple(df[f'l{unum}\\_body'] for unum in range(2,12)))
others_vxs = pd.concat(tuple(df[f'l{unum}\\_vx'] for unum in range(2,12)))
others_vys = pd.concat(tuple(df[f'l{unum}\\_vy'] for unum in range(2,12)))
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
plotjoint(others_xs, others_ys, 'others\\_field\\_players\\_xs','others\\_field\\_players\\_ys')
plotjoint(others_xs, others_bodies, 'others\\_field\\_players\\_xs','others\\_field\\_players\\_bodies')
plotjoint(others_bodies, others_ys, 'others\\_field\\_players\\_bodies','others\\_field\\_players\\_ys')
plotjoint(others_vxs, others_vys, 'others\\_field\\_players\\_vxs','others\\_field\\_players\\_vys')
plotjoint(others_xs, others_vxs, 'others\\_field\\_players\\_xs','others\\_field\\_players\\_vxs')
plotjoint(others_vxs, others_ys, 'others\\_field\\_players\\_vxs','others\\_field\\_players\\_ys')
plotjoint(others_xs, others_vys, 'others\\_field\\_players\\_xs','others\\_field\\_players\\_vys')
plotjoint(others_vys, others_ys, 'others\\_field\\_players\\_vys','others\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

### All field players

In [None]:
start = time.time()
all_xs = pd.concat((xs, others_xs), ignore_index=True)
all_ys = pd.concat((ys, others_ys), ignore_index=True)
all_bodies = pd.concat((bodies, others_bodies), ignore_index=True)
all_vxs = pd.concat((vxs, others_vxs), ignore_index=True)
all_vys = pd.concat((vys, others_vys), ignore_index=True)
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
plotjoint(all_xs, all_ys, 'all\\_field\\_players\\_xs','all\\_field\\_players\\_ys')
plotjoint(all_xs, all_bodies, 'all\\_field\\_players\\_xs','all\\_field\\_players\\_bodies')
plotjoint(all_bodies, all_ys, 'all\\_field\\_players\\_bodies','all\\_field\\_players\\_ys')
plotjoint(all_vxs, all_vys, 'all\\_field\\_players\\_vxs','all\\_field\\_players\\_vys')
plotjoint(all_xs, all_vxs, 'all\\_field\\_players\\_xs','all\\_field\\_players\\_vxs')
plotjoint(all_vxs, all_ys, 'all\\_field\\_players\\_vxs','all\\_field\\_players\\_ys')
plotjoint(all_xs, all_vys, 'all\\_field\\_players\\_xs','all\\_field\\_players\\_vys')
plotjoint(all_vys, all_ys, 'all\\_field\\_players\\_vys','all\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

### Helios players (including goalkeeper)

In [None]:
start = time.time()
xs = pd.concat((xs, df['r1\\_x']), ignore_index=True)
ys = pd.concat((ys, df['r1\\_y']), ignore_index=True)
bodies = pd.concat((bodies, df['r1\\_body']), ignore_index=True)
vxs = pd.concat((vxs, df['r1\\_vx']), ignore_index=True)
vys = pd.concat((vys, df['r1\\_vy']), ignore_index=True)
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
index = xs.sample(100000).index
plotjoint(xs[index], ys[index], 'Helios x','Helios y', alpha=1).savefig('heliosx_heliosy.pdf', format='pdf')
plotjoint(xs[index], bodies[index], 'Helios x', 'Helios body', alpha=1).savefig('heliosx_heliosbody.pdf', format='pdf')
plotjoint(bodies[index], ys[index], 'Helios body','Helios y', alpha=1).savefig('heliosbody_heliosy.pdf', format='pdf')
plotjoint(vxs[index], vys[index], 'Helios vx', 'Helios vy', alpha=1).savefig('heliosvx_heliosvy.pdf', format='pdf')
plotjoint(xs, vxs, 'full\\_helios\\_field\\_players\\_xs','full\\_helios\\_field\\_players\\_vxs')
plotjoint(vxs, ys, 'full\\_helios\\_field\\_players\\_vxs','full\\_helios\\_field\\_players\\_ys')
plotjoint(xs, vys, 'full\\_helios\\_field\\_players\\_xs','full\\_helios\\_field\\_players\\_vys')
plotjoint(vys, ys, 'full\\_helios\\_field\\_players\\_vys','full\\_helios\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

### Other players (including goalkeeper)

In [None]:
start = time.time()
others_xs = pd.concat((others_xs, df['l1\\_x']), ignore_index=True)
others_ys = pd.concat((others_ys, df['l1\\_y']), ignore_index=True)
others_bodies = pd.concat((others_bodies, df['l1\\_body']), ignore_index=True)
others_vxs = pd.concat((others_vxs, df['l1\\_vx']), ignore_index=True)
others_vys = pd.concat((others_vys, df['l1\\_vy']), ignore_index=True)
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
index = others_xs.sample(100000).index
plotjoint(others_xs[index], others_ys[index], "Adversaries x", "Adversaries y").savefig('othersx_othersy.pdf', format='pdf')
plotjoint(others_xs[index], others_bodies[index], "Adversaries x", "Adversaries body").savefig('othersx_othersbody.pdf', format='pdf')
plotjoint(others_bodies[index], others_ys[index], "Adversaries body", "Adversaries y").savefig('othersbody_othersy.pdf', format='pdf')
plotjoint(others_vxs[index], others_vys[index], 'Adversaries vx', 'Adversaries vy').savefig('othersvx_othersvy.pdf', format='pdf')
plotjoint(others_xs, others_vxs, 'Adversaries x', 'Adversaries vx')
plotjoint(others_vxs, others_ys, "Adversaries vx", "Adversaries y")
plotjoint(others_xs, others_vys, 'full\\_others\\_field\\_players\\_xs', 'full\\_others\\_field\\_players\\_vys')
plotjoint(others_vys, others_ys, 'full\\_others\\_field\\_players\\_vys', 'full\\_others\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

### All players (including goalkeeper)

In [None]:
start = time.time()
all_xs = pd.concat((xs, others_xs), ignore_index=True)
all_ys = pd.concat((ys, others_ys), ignore_index=True)
all_bodies = pd.concat((bodies, others_bodies), ignore_index=True)
all_vxs = pd.concat((vxs, others_vxs), ignore_index=True)
all_vys = pd.concat((vys, others_vys), ignore_index=True)
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
index = all_xs.sample(100000).index
plotjoint(all_xs[index], all_ys[index], "All teams x" ,"All teams y").savefig('allx_ally.pdf', format='pdf')
plotjoint(all_xs[index], all_bodies[index], "All teams x","All teams body").savefig('allx_allbody.pdf', format='pdf')
plotjoint(all_bodies[index], all_ys[index], "All teams body", "All teams y").savefig('allbody_ally.pdf', format='pdf')
plotjoint(all_vxs[index], all_vys[index], 'full\\_all\\_field\\_players\\_vxs','full\\_all\\_field\\_players\\_vys').savefig('allvx_allvy.pdf', format='pdf')
plotjoint(all_xs, all_vxs, 'full\\_all\\_field\\_players\\_xs','full\\_all\\_field\\_players\\_vxs')
plotjoint(all_vxs, all_ys, "All teams vx", "All teams vy")
plotjoint(all_xs, all_vys, 'full\\_all\\_field\\_players\\_xs','full\\_all\\_field\\_players\\_vys')
plotjoint(all_vys, all_ys, 'full\\_all\\_field\\_players\\_vys','full\\_all\\_field\\_players\\_ys')
print(f"Finished in {time.time()-start} sec")

## Hetero Parameters 

Let's check their normalization individually

In [None]:
start = time.time()
axis_labels = {
    'dash\\_power\\_rate': 'Dash Power Rate',
    'effort\\_min': 'Effort Minimum',
    'effort\\_max': 'Effort Maximum',
    'extra\\_stamina': 'Extra Stamina',
    'inertia\\_moment': 'Inertia Moment',
    'kick\\_rand': 'Kick Random',
    'kickable\\_margin': 'Kickable Margin',
    'player\\_decay': 'Player Decay',
}
for feature in heteroparam_features:
    feature = feature.replace('_', '\\_')
    values = pd.concat(tuple(df[f"{side}{unum}\\_{feature}"] for side in ('l','r') for unum in range(1,12)))
    p = sb.displot(values, kde=True, label=feature)
    p.set_axis_labels(axis_labels[feature], '')
    feature = feature.replace('\\', '')
    p.savefig(f'all_{feature}.pdf', format='pdf')
print(f"Finished in {time.time()-start} sec")

# Action Space Exploration

Now let's explore a bit about the actions in our dataset.

Kick and Tackle actions are rare, so we need to sample more datapoints.

In [None]:
start = time.time()
BATCH_SIZE = 1000000 # 1M rows is around 1.6 % of the dataset
columns = [
    *(f'ball_{feature}' for feature in position_features + vel_features),
    *(f'self_{feature}' for feature in pose_features + vel_features + heteroparam_features),
    *output_columns
]
dataset2 = tf.data.experimental.make_csv_dataset(
    'indiv_arch_dataset.csv.gz',
    column_defaults=(
        column_defaults[:4] + column_defaults[-20:] # Ball + Self and Commands
    ),
    select_columns=columns,
    batch_size=BATCH_SIZE,
    compression_type='GZIP',
    shuffle=False, # Batch size is too big for shuffle to make a difference in this analysis
    na_value=''
).take(1) # Dataset with only 1 batch
print(f"Finished in {time.time()-start} sec")

This one below can take more than an hour to load, even though we're consuming less memory because we select only some columns

In [None]:
start = time.time()
df2 = pd.DataFrame.from_dict(
    data=dict(next(dataset2.as_numpy_iterator()).items()), # Because we have a single batch
    dtype=np.float32
)
print(f"Finished in {time.time()-start} sec")

#### **Important**: This allows us to use latex with column names that have '_' characters

In [None]:
df2 = df2.rename(lambda s: s.replace('_', '\\_'), axis='columns')

In [None]:
start = time.time()
correct_vel_normalizations(df2)
print(f"Finished in {time.time()-start} sec")

In [None]:
df2.info()
df2.head()

## Action Distributions


In [None]:
start = time.time()
plt.box(False)
bins = [-0.5, 0.5, 1.5, 2.5, 3.5]
total_commands = len(df2)
values, bins_edges, p = plt.hist(
    df2['playercommand\\_type'], 
    bins=bins, 
    rwidth=0.8
)
for i in range(len(bins)-1):
    text = f"{int(10000*values[i]/total_commands)/100}\\%"
    plt.text(
        bins_edges[i]+(0.32*5)/len(text),
        values[i]+10000,
        text
    )
plt.xlabel('Command')
plt.ylabel('\# per million')
plt.axline((0.0,-0.1),(1.0,-0.1),color='black',linewidth=1)
plt.plot()
plt.savefig('command_distribution.pdf', format='pdf', bbox_inches='tight')
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
colors = {
    'dash': 'blue',
    'turn': 'green',
    'kick': 'gold',
    'tackle': 'red'
}
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'dash':0.1, 'turn': 0.1, 'kick': 0.8, 'tackle': 1}
).savefig('command_scatter.pdf', format='pdf', bbox_inches='tight')
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
colors = {
    'dash': 'blue',
    'turn': 'green',
    'kick': 'gold',
    'tackle': 'red'
}
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','kick'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'tackle': 1}
).savefig('tackle_scatter.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','tackle'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'kick': 1}
).savefig('kick_scatter.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'kick':0.5, 'tackle': 0.5}
).savefig('kick_tackle_scatter.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_vx', 
    'self\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','kick'],
    xlabel='Helios vx', 
    ylabel='Helios vy', 
    colors=colors,
    alphas={'tackle': 1}
)
plotjointgrouped(
    'self\\_vx', 
    'self\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','tackle'],
    xlabel='Helios vx', 
    ylabel='Helios vy', 
    colors=colors,
    alphas={'kick': 1}
)
plotjointgrouped(
    'self\\_vx', 
    'self\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn'],
    xlabel='Helios vx', 
    ylabel='Helios vy', 
    colors=colors,
    alphas={'kick':0.5, 'tackle': 0.5}
).savefig('kick_tackle_self_vels_scatter.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'ball\\_vx', 
    'ball\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','kick'],
    xlabel='Ball vx', 
    ylabel='Ball vy', 
    colors=colors,
    alphas={'tackle': 1}
)
plotjointgrouped(
    'ball\\_vx', 
    'ball\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn','tackle'],
    xlabel='Ball vx', 
    ylabel='Ball vy', 
    colors=colors,
    alphas={'kick': 1}
)
plotjointgrouped(
    'ball\\_vx', 
    'ball\\_vy', 
    'playercommand\\_type', 
    df=df2,
    exclude=['dash','turn'],
    xlabel='Ball vx', 
    ylabel='Ball vy', 
    colors=colors,
    alphas={'kick':0.5, 'tackle': 0.5}
).savefig('kick_tackle_ballvels_scatter.pdf', format='pdf', bbox_inches='tight')
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
colors = {
    'dash': 'blue',
    'turn': 'green',
    'kick': 'gold',
    'tackle': 'red'
}
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    exclude=['turn','kick','tackle'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'dash':0.1}
).savefig('dash_scatter_os.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    exclude=['dash','kick','tackle'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'turn':0.1}
).savefig('turn_scatter_os.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_x', 
    'self\\_y', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    exclude=['kick','tackle'],
    xlabel='Helios x', 
    ylabel='Helios y', 
    colors=colors,
    alphas={'dash':0.1, 'turn': 0.1}
).savefig('dash_turn_scatter_os.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'self\\_vx', 
    'self\\_vy', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    exclude=['kick','tackle'],
    xlabel='Helios vx', 
    ylabel='Helios vy', 
    colors=colors,
    alphas={'dash':0.1, 'turn': 0.1}
).savefig('dash_turn_self_vel_scatter.pdf', format='pdf', bbox_inches='tight')
plotjointgrouped(
    'ball\\_vx', 
    'ball\\_vy', 
    'playercommand\\_type', 
    df=df2.sample(300000),
    exclude=['kick','tackle'],
    xlabel='Ball vx', 
    ylabel='Ball vy', 
    colors=colors,
    alphas={'dash':0.1, 'turn': 0.1}
).savefig('dash_turn_ball_vel_scatter.pdf', format='pdf', bbox_inches='tight')
print(f"Finished in {time.time()-start} sec")

In [None]:
start = time.time()
colors = {
    'dash': 'blue',
    'turn': 'green',
    'kick': 'gold',
    'tackle': 'red'
}
##
# Separation of kick and tackle
##
diff_df = df2[['ball\\_x','ball\\_y','self\\_x','self\\_y','playercommand\\_type']].copy()
diff_df = diff_df.assign(playerballxdiff=pd.Series(df2['self\\_x'] - df2['ball\\_x']).values)
diff_df = diff_df.assign(playerballydiff=pd.Series(df2['self\\_y'] - df2['ball\\_y']).values)
diff_df.info()
plotjointgrouped(
    'playerballxdiff', 
    'playerballydiff', 
    'playercommand\\_type', 
    df=diff_df,
    exclude=['dash','turn'],
    xlabel='Helios x - Ball x', 
    ylabel='Helios y - Ball y', 
    colors=colors,
    alphas={'kick':0.5, 'tackle': 0.5},
    kde=False
).savefig('kick_tackle_posdiff_scatter.pdf', format='pdf')
print(f"Finished in {time.time()-start} sec")

## Action Parameters Distributions

First, let's look at the parameters themselves

In [None]:
start = time.time()
##
# Dash
##
plotjoint(
    df2['dash\\_power'], 
    df2['dash\\_direction'], 
    'Dash Power', 
    'Dash Direction', 
    alpha=1
).savefig('dash_power_dir_distribution.pdf', format='pdf')
##
# Turn
##
sb.displot(
    df2['turn\\_moment']
).set_axis_labels(
    'Turn Moment',
    '\# per million'
).savefig('turn_moment_distribution.pdf', format='pdf')
##
# Kick
##
plotjoint(
    df2['kick\\_power'], 
    df2['kick\\_direction'], 
    'Kick Power', 
    'Kick Direction', 
    alpha=1
).savefig('kick_power_dir_distribution.pdf', format='pdf')
##
# Tackle
##
sb.displot(
    df2['tackle\\_direction']
).set_axis_labels(
    'Tackle Direction',
    '\# per million'
).savefig('tackle_dir_distribution.pdf', format='pdf')
print(f"Finished in {time.time()-start} sec")

Now, at how they relate to features

In [None]:
start = time.time()
dash_only = df2.sample(300000)
dash_only = dash_only[dash_only['playercommand\\_type'] == b'dash']
turn_only = df2.sample(300000)
turn_only = turn_only[turn_only['playercommand\\_type'] == b'turn']
kick_only = df2[df2['playercommand\\_type'] == b'kick']
tackle_only = df2[df2['playercommand\\_type'] == b'tackle']
##
# Dash
##
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=dash_only,
    hue='dash\\_power',
    s=0.5,
    palette="flare"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="flare", 
    norm=plt.Normalize(dash_only['dash\\_power'].min(), dash_only['dash\\_power'].max())),
    label='Dash Power'
)
plt.savefig('dash_power_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=dash_only,
    hue='dash\\_direction',
    s=0.5,
    palette="crest"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="crest", 
    norm=plt.Normalize(dash_only['dash\\_direction'].min(), dash_only['dash\\_direction'].max())),
    label='Dash Direction'
)
plt.savefig('dash_dir_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
##
# Turn
##
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=turn_only,
    hue='turn\\_moment',
    s=0.5,
    palette="crest"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="crest", 
    norm=plt.Normalize(turn_only['turn\\_moment'].min(), turn_only['turn\\_moment'].max())),
    label='Turn Moment'
)
plt.savefig('turn_moment_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
##
# Kick
##
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=kick_only,
    hue='kick\\_power',
    s=0.5,
    palette="flare"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="flare", 
    norm=plt.Normalize(kick_only['kick\\_power'].min(), kick_only['kick\\_power'].max())),
    label='Kick Power'
)
plt.savefig('kick_power_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
#
#
#
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=kick_only,
    hue='kick\\_direction',
    s=0.5,
    palette="crest"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="crest", 
    norm=plt.Normalize(kick_only['kick\\_direction'].min(), kick_only['kick\\_direction'].max())),
    label='Kick Direction'
)
plt.savefig('kick_dir_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
##
# Tackle
##
plt.figure()
p = sb.scatterplot(
    x='self\\_x',
    y='self\\_y',
    data=tackle_only,
    hue='tackle\\_direction',
    palette="crest"
)
p.set_xlabel('Helios x') 
p.set_ylabel('Helios y')
p.get_legend().remove()
bar = p.figure.colorbar(plt.cm.ScalarMappable(
    cmap="crest", 
    norm=plt.Normalize(tackle_only['tackle\\_direction'].min(), tackle_only['tackle\\_direction'].max())),
    label='Tackle Direction'
)
plt.savefig('tackle_dir_selfpos_scatter.pdf', format='pdf', bbox_inches='tight')
print(f"Finished in {time.time()-start} sec")