In [None]:
import json
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import cv2

from matplotlib.gridspec import GridSpec

In [None]:
pd.set_option('display.max_columns', 999)
%matplotlib inline
mpl.rcParams['figure.facecolor'] = 'white'

In [None]:
HOME_TEAM = "England"
AWAY_TEAM = "Germany"

PITCH_IMG = cv2.imread("./reasources/images/soccer-field-resized.png")
PITCH_MAX_X = 570
PITCH_MAX_Y = 333
PITCH_MIN_X = 27
PITCH_MIN_Y = 10

In [None]:
def extract_json_from_html(html_path, save_output=False):
    html_file = open(html_path, 'r')
    html = html_file.read()
    html_file.close()
    regex_pattern = r'(?<=require\.config\.params\["args"\].=.)[\s\S]*?;'
    data_txt = re.findall(regex_pattern, html)[0]

    # add quotations for json parser
    data_txt = data_txt.replace('matchId', '"matchId"')
    data_txt = data_txt.replace('matchCentreData', '"matchCentreData"')
    data_txt = data_txt.replace('matchCentreEventTypeJson', '"matchCentreEventTypeJson"')
    data_txt = data_txt.replace('formationIdNameMappings', '"formationIdNameMappings"')
    data_txt = data_txt.replace('};', '}')

    if save_output:
        # save json data to txt
        output_file = open(f"{html_path}.txt", "wt")
        n = output_file.write(data_txt)
        output_file.close()

    return data_txt

In [None]:
def extract_data_from_dict(data):
    # load data from json
    event_types_json = data["matchCentreEventTypeJson"]
    formation_mappings = data["formationIdNameMappings"]
    events_dict = data["matchCentreData"]["events"]
    teams_dict = {data["matchCentreData"]['home']['teamId']: data["matchCentreData"]['home']['name'],
                  data["matchCentreData"]['away']['teamId']: data["matchCentreData"]['away']['name']}
    players_dict = data["matchCentreData"]["playerIdNameDictionary"]
    # create players dataframe
    players_home_df = pd.DataFrame(data["matchCentreData"]['home']['players'])
    players_home_df["teamId"] = data["matchCentreData"]['home']['teamId']
    players_away_df = pd.DataFrame(data["matchCentreData"]['away']['players'])
    players_away_df["teamId"] = data["matchCentreData"]['away']['teamId']
    players_df = pd.concat([players_home_df, players_away_df])
    players_ids = data["matchCentreData"]["playerIdNameDictionary"]
    return events_dict, players_df, teams_dict

In [None]:
match_html_path = './reasources/htmls/England 2-0 Germany - European Championship 2020 Live.html'
json_data_txt = extract_json_from_html(match_html_path)

In [None]:
data = json.loads(json_data_txt)
events_dict, players_df, teams_dict = extract_data_from_dict(data)

In [None]:
teams_dict

In [None]:
players_df.head()

In [None]:
df = pd.DataFrame(events_dict)
df.head()

In [None]:
df.columns

In [None]:
df = pd.DataFrame(events_dict)

df["eventType"] = df.apply(lambda row: row["type"]["displayName"], axis=1)
df["outcomeType"] = df.apply(lambda row: row["outcomeType"]["displayName"], axis=1)
df["half"] = df.apply(lambda row: row["period"]["displayName"], axis=1)

df["x"] = df["x"].apply(lambda row: PITCH_MIN_X + row * (PITCH_MAX_X - PITCH_MIN_X) / 99.5)
df["endX"] = df["endX"].apply(lambda row: PITCH_MIN_X + row * (PITCH_MAX_X - PITCH_MIN_X) / 99.5)
df["blockedX"] = df["blockedX"].apply(lambda row: PITCH_MIN_X + row * (PITCH_MAX_X - PITCH_MIN_X) / 99.5)
df["y"] = df["y"].apply(lambda row: PITCH_MIN_Y + row * (PITCH_MAX_Y - PITCH_MIN_Y) / 100)
df["endY"] = df["endY"].apply(lambda row: PITCH_MIN_Y + row * (PITCH_MAX_Y - PITCH_MIN_Y) / 100)
df["blockedY"] = df["blockedY"].apply(lambda row: PITCH_MIN_Y + row * (PITCH_MAX_Y - PITCH_MIN_Y) / 100)
df["goalMouthY"] = df["goalMouthY"].apply(lambda row: PITCH_MIN_Y + row * (PITCH_MAX_Y - PITCH_MIN_Y) / 100)

df["time"] = df["minute"] + df["second"] / 100

df.replace(teams_dict, inplace=True)

recivers= []
for idx, row in df.iterrows():
    if row["eventType"] == "Pass" and row["outcomeType"] == "Successful":
        player_team = players_df[players_df.playerId == row["playerId"]]["teamId"].item()
        try:
            reciver_team = players_df[players_df.playerId == df.iloc[idx+1, 14]]["teamId"].item()
        except ValueError:
            reciver_team = 0
        # print(player_team, reciver_team)
        if player_team == reciver_team:
            recivers.append(df.iloc[idx+1, 14])
        else:
            recivers.append(np.nan)
    else:
        recivers.append(np.nan)
df["reciverId"] = recivers

df.drop(columns=["id", "period", "type", "minute", "second"], inplace=True)
# TODO change columns order
df.head()

In [None]:
FIRST_HOME_SUB = df[(df.eventType == 'SubstitutionOn') & (df.teamId == HOME_TEAM)]["time"].head(1).item()
FIRST_AWAY_SUB = df[(df.eventType == 'SubstitutionOn') & (df.teamId == AWAY_TEAM)]["time"].head(1).item()

In [None]:
def get_value_from_qualifiers(row, key):
    for d in row:
        if d["type"]["displayName"] == key:
            return d["value"]
    return np.nan

In [None]:
passes = df.loc[df.index[df.eventType == "Pass"], ["teamId", "playerId", "x", "y", "endX", "endY", "outcomeType", "half", "time", "qualifiers"]]
passes["length"] = passes.apply(lambda row: get_value_from_qualifiers(row["qualifiers"], "Length"), axis=1)
passes["zone"] = passes.apply(lambda row: get_value_from_qualifiers(row["qualifiers"], "Zone"), axis=1)
passes.drop(columns=["qualifiers"], inplace=True)
passes

In [None]:
england_passes = passes[(passes.teamId == HOME_TEAM)]
england_passes

In [None]:
fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")

ax.imshow(PITCH_IMG)
ax.axis("off")
for idx, row in england_passes.iterrows():
    x_values = (row["x"], row["endX"])
    y_values = (row["y"], row["endY"])
    color = "green" if row["outcomeType"] == "Successful" else "red"
    ax.plot(x_values, y_values, color=color)
plt.show()

England's goalkeeper passes

In [None]:
england_goalkeeper_passes = england_passes[england_passes["playerId"] == 110189]
england_goalkeeper_passes.head()

In [None]:
fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")

ax.imshow(PITCH_IMG)
ax.axis("off")

for idx, row in england_goalkeeper_passes.iterrows():
    x_values = (row["x"], row["endX"])
    y_values = (row["y"], row["endY"])
    color = "green" if row["outcomeType"] == "Successful" else "red"
    ax.plot(x_values, y_values, color=color)
plt.show()

In [None]:
avg_position = england_passes.loc[df.time < FIRST_HOME_SUB].groupby(["playerId"])[["x", "y"]].mean()

In [None]:
by_player = df[(df.teamId == HOME_TEAM) & (df.eventType == "Pass") & (df.outcomeType == "Successful") & (df.time < FIRST_HOME_SUB)]\
            .groupby(['playerId', 'reciverId'])\
            .size()\
            .reset_index(name="Count")\
            .pivot(index="playerId", columns="reciverId", values="Count")\
            .merge(avg_position, how="right", on="playerId")
by_player

In [None]:
fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")

ax.imshow(PITCH_IMG)
ax.axis("off")

for idx, row in by_player.iterrows():
    player_circle = plt.Circle((row["x"], row["y"]), radius=7, color="r", ec="white")
    ax.add_patch(player_circle)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")

ax.imshow(PITCH_IMG)
ax.axis("off")

players_id = by_player.columns[:-2]

for idx, row in by_player.iterrows():
    player_id = row.name
    for reciver_id in players_id:
        count = by_player.loc[player_id, reciver_id] + by_player.loc[reciver_id, player_id]
        x_values = (by_player.loc[player_id, "x"], by_player.loc[reciver_id, "x"])
        y_values = (by_player.loc[player_id, "y"], by_player.loc[reciver_id, "y"])
        ax.plot(x_values, y_values, color="r", linewidth=count/4, zorder=1)

for idx, row in by_player.iterrows():
    player_circle = plt.Circle((row["x"], row["y"]), radius=7, color="r", ec="white", zorder=2)
    ax.add_patch(player_circle)


plt.show()

In [None]:
england_shots = df[(df.isShot == True) & (df.teamId == "England")]
england_shots

In [None]:

fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")

ax.imshow(PITCH_IMG)
ax.axis("off")
for idx, row in england_shots.iterrows():
    x_values = (row["x"], PITCH_MAX_X)
    y_values = (row["y"], row["goalMouthY"])
    color = "green" if row["eventType"] == "Goal" else "red"
    ax.plot(x_values, y_values, color=color)
plt.show()

In [None]:
shots_x = []
shots_y = []
colors = []
fig, ax = plt.subplots()
fig.set_facecolor("#313332")
goal_img = cv2.imread("./reasources/images/goal.jpg")
a, b = int(PITCH_MIN_Y + (PITCH_MAX_Y - PITCH_MIN_Y) * 31/68), int(PITCH_MAX_Y - (PITCH_MAX_Y - PITCH_MIN_Y) * 31/68)
ax.imshow(goal_img, extent=[a - 3, b + 3, 0, 20])
for idx, row in england_shots.iterrows():
    if row["eventType"] != "MissedShots":
        shots_x.append(row.goalMouthY)
        shots_y.append(4 + row.goalMouthZ * 12 / 100)
        if row.eventType == "Goal":
            colors.append("green")
        else:
            colors.append("red")
ax.scatter(shots_x, shots_y, c=colors)
ax.axis("off")
plt.show()

In [None]:
DECLAN_RICE = 332325
dr_events = df[df.playerId == DECLAN_RICE]
dr_events["eventType"].unique()

In [None]:
idx_to_drop = dr_events[dr_events.eventType.isin(['Card', 'SubstitutionOff'])].index
dr_events.drop(idx_to_drop, inplace=True)
dr_events["eventCategory"] = dr_events.apply(lambda row: get_event_category(row.eventType, row.outcomeType), axis=1)
dr_events[["x", "y"]].describe()

In [None]:
fig, ax = plt.subplots(figsize=(8,9))

fig.set_facecolor("#313332")
markers = {"Defensive action": "o", "Action with ball": "X", "Challenge": "s", "Ball losses": "d"}

ax.imshow(PITCH_IMG)
ax.axis("off")
for key in markers.keys():
    ax.scatter(dr_events[dr_events.eventCategory == key]["x"], dr_events[dr_events.eventCategory == key]["y"], s=70, facecolors='none', edgecolors='r', alpha=0.4, marker=markers[key], label=key)
ax.legend(markers)

plt.show()

In [None]:
def get_event_category(event_type, event_outcome):
    if event_type in ['Clearance', 'BallRecovery', 'Interception', 'Save']:
        return "Defensive action"
    elif event_type in ['BallTouch', 'TakeOn'] or (event_type == 'Pass' and event_outcome == 'Successful'):
        return "Action with ball"
    elif event_type in ['Tackle', 'Challenge', 'Foul']:
        return "Challenge"
    else:
        return "Ball losses"

In [None]:
def plot_player_passes(ax: mpl.axes.Axes, df: pd.DataFrame, player_id: int):
    ax.set_xlim((0, PITCH_MAX_X))
    ax.set_ylim((0, PITCH_MAX_Y))
    ax.axis("off")
    im = plt.imread("./reasources/images/football_pitch_resized.png")
    ax.imshow(im, extent=[0, PITCH_MAX_X, 0, PITCH_MAX_Y])
    title = players_df[players_df.playerId == player_id]["name"].item()
    ax.set_title(title)

    df = df[df["playerId"] == player_id]

    for idx, row in df.iterrows():
        x_values = (row["x"], row["endX"])
        y_values = (row["y"], row["endY"])
        color = "blue" if row["outcomeType"] == "Successful" else "red"
        ax.plot(x_values, y_values, color=color)
    return ax

In [None]:
players_ids = england_passes.groupby(["playerId"]).size().sort_values(ascending=False).reset_index().playerId.to_list()
players_ids

In [None]:
fig = plt.figure(figsize=(8, 6), dpi=200)
nrows = 4
ncols = 3

gspec = GridSpec(ncols=ncols, nrows=nrows, figure=fig, hspace=0.3)
plot_counter = 0
for row in range(nrows):
    for col in range(ncols):
        ax = plt.subplot(
                gspec[row, col],
                facecolor = "#EFE9E6"
            )
        player_id = players_ids[row + col * ncols]
        plot_player_passes(ax, england_passes, player_id)