In [None]:
!pip install mplsoccer

In [None]:
import duckdb

import mplsoccer
from mplsoccer import Pitch, VerticalPitch

import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
from scipy.stats import binned_statistic_2d

In [None]:
df_stories = duckdb.sql(f"""
    SELECT * FROM '/content/drive/MyDrive/footly_data/stories.parquet'
    WHERE type = 'pass'
""").df()

df_stories.shape

In [None]:
#Znebim se podaj, ki so koncale v avtu
df_stories_filtered = df_stories[df_stories['ball_out'] != True]
df_stories_filtered.shape

In [None]:
# Create a copy and scale the coordinates to real pitch dimensions (105x68 meters)
df_move = df_stories_filtered.copy()
df_move["start_x"] *= 105 / 100
df_move["start_y"] *= 68 / 100
df_move["end_x"] *= 105 / 100
df_move["end_y"] *= 68 / 100

df_move.head(3)

In [None]:
# Recalculate pass_length using scaled coordinates
df_move["pass_length"] = np.sqrt(
    (df_move["end_x"] - df_move["start_x"])**2 +
    (df_move["end_y"] - df_move["start_y"])**2
)

df_move.head(3)

In [None]:
import pyarrow
import fastparquet

In [None]:
df_move["cross"].value_counts()

In [None]:
# List of columns to keep
columns_to_keep = [
    'id',
    'start_x',
    'start_y',
    'end_x',
    'end_y',
    'pass_length',
    'successful',
    'pass_high',
    'player_id',
    'team_id',
    'pass_recipient_id',
    'player_position',
    'season',
    'long_pass',
    'possession_id',
    'minute',
    'cross'
]

# Select only the desired columns
df_done = df_move[columns_to_keep]

# Display the first few rows to verify
df_done.head()

In [None]:
# Save the reduced DataFrame as Parquet
df_done.to_parquet('/content/drive/MyDrive/footly_data/passes_crosses_Done.parquet', index=False)

print("DataFrame saved successfully!")

In [None]:
# List of columns to keep
columns_to_keep_2 = [
    'id',
    'start_x',
    'start_y',
    'end_x',
    'end_y',
    'team_id'
]

# Select only the desired columns
df_done_2 = df_move[columns_to_keep_2]

# Display the first few rows to verify
df_done_2.head(2)

In [None]:
# Save the reduced DataFrame as Parquet
df_done_2.to_parquet('/content/drive/MyDrive/footly_data/moving_small.parquet', index=False)

print("DataFrame saved successfully!")

In [None]:
# Randomly sample 50% of the DataFrame
df_smaller = df_move.sample(frac=0.5, random_state=123)

# Optionally, reset the index if needed
df_smaller.reset_index(drop=True, inplace=True)

print(f"Original size: {len(df_move)}, Reduced size: {len(df_smaller)}")

In [None]:
#create 2D histogram of these
pitch = Pitch(line_color='black',pitch_type='custom', pitch_length=105, pitch_width=68, line_zorder = 2)
move = pitch.bin_statistic(df_smaller.start_x, df_smaller.start_y, statistic='count', bins=(16, 12), normalize=False)

fig, ax = pitch.grid(grid_height=0.9, title_height=0.06, axis=False,
                     endnote_height=0.04, title_space=0, endnote_space=0)
pcm  = pitch.heatmap(move, cmap='Blues', edgecolor='grey', ax=ax['pitch'])
#legend to our plot
ax_cbar = fig.add_axes((1, 0.093, 0.03, 0.786))
cbar = plt.colorbar(pcm, cax=ax_cbar)
fig.suptitle('Moving actions 2D histogram', fontsize = 30)
plt.show()
#get the array
move_count = move["statistic"]

## Shots

In [None]:
df_shots = duckdb.sql(f"""
    SELECT * FROM '/content/drive/MyDrive/footly_data/stories.parquet'
    WHERE type = 'shot'
""").df()

df_shots.shape

In [None]:
df_copy = df_shots.copy()
df_copy["start_x"] *= 105 / 100
df_copy["start_y"] *= 68 / 100
df_copy["end_x"] *= 105 / 100
df_copy["end_y"] *= 68 / 100

In [None]:
columns_to_keep_3 = [
    'id',
    'start_x',
    'start_y',
    'end_x',
    'end_y',
    'team_id'
]

# Select only the desired columns
df_done_3 = df_copy[columns_to_keep_3]

# Display the first few rows to verify
df_done_3.head(2)

In [None]:
# Save the reduced DataFrame as Parquet
df_done_3.to_parquet('/content/drive/MyDrive/footly_data/shots_small.parquet', index=False)

print("DataFrame saved successfully!")