In [None]:
import sys
sys.path.insert(0, '../')


import config
import tqdm


import pandas as pd

from pandas import json_normalize
import json
from tqdm.auto import tqdm

import matplotlib.pyplot as plt
from matplotlib.patches import Arc

import numpy as np
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter

import requests

from matplotlib.colors import Normalize
import matplotlib.patheffects as pe

from ast import literal_eval
plt.style.use('fivethirtyeight')

In [None]:
def FreeCompetitions(env='github'):
    """Function to retrieve free competitions from StatsBomb open data.

        Args:
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            comp_df (dataframe): DataFrame with all free competitions.

        Raises:
            ValueError: wrong value for 'env'.
    """

    if env == 'github':
        competitions_url = config.data_url + "competitions.json"
        raw_competitions = requests.get(url=competitions_url)
        raw_competitions.encoding = 'utf-8'
        competitions = raw_competitions.json()
    elif env == 'local':
        competitions_path = config.data_dir + 'competitions.json'
        with open(competitions_path, encoding='utf-8') as json_file:
            competitions = json.load(json_file)
    else:
        raise ValueError(
            "'env' variable should be either 'local' or 'github'.")

    comp_df = pd.DataFrame(competitions)
    return comp_df


def FreeMatches(competitions, env='github'):
    """Function to retrieve matches from all competitions in competitions.

        Args:
            competitions (DataFrame): df with competitions, must contain 'competition_id' and 'season_id'
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            matches_df (DataFrame): DataFrame with all matches.

        Raises:
            ValueError: wrong value for 'env'.
    """

    matches_df = pd.DataFrame()
    if env == 'github':
        for i in tqdm(range(len(competitions))):
            comp_id = str(competitions['competition_id'][i])
            season_id = str(competitions['season_id'][i])
            matches_url = config.data_url + \
                f"/matches/{comp_id}/{season_id}.json"
            raw_matches = requests.get(url=matches_url)
            matches = json_normalize(raw_matches.json())
            matches_df = matches_df.append(
                matches, ignore_index=True, sort=False)
    elif env == 'local':
        for i in tqdm(range(len(competitions))):
            comp_id = str(competitions['competition_id'][i])
            season_id = str(competitions['season_id'][i])
            matches_url = config.data_dir + \
                f"/matches/{comp_id}/{season_id}.json"
            with open(matches_url, encoding='utf-8') as json_file:
                raw_matches = json.load(json_file)
            matches = json_normalize(raw_matches)
            matches_df = matches_df.append(
                matches, ignore_index=True, sort=False)
    else:
        raise ValueError(
            "'env' variable should be either 'local' or 'github'.")

    return matches_df


def get_matchFree(match_id, env='github'):
    """Function to retrieve events from a match.

        Args:
            match_id (int): id of the game.
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            events (DataFrame): DataFrame with all events.

        Raises:
            ValueError: wrong value for 'env'.
    """

    if env == 'github':
        events_url = config.data_url + f"events/{match_id}.json"
        raw_events_api = requests.get(url=events_url)
        raw_events_api.encoding = 'utf-8'
        events = pd.DataFrame(json_normalize(raw_events_api.json()))
    elif env == 'local':
        events_url = config.data_dir + f"events/{match_id}.json"
        with open(events_url, encoding='utf-8') as json_file:
            raw_events_api = json.load(json_file)
        events = pd.DataFrame(json_normalize(raw_events_api))
    else:
        raise ValueError(
            "'env' variable should be either 'local' or 'github'.")

    events.loc[:, 'match_id'] = match_id
    return events


# Data from a list of matches (use get_matchFree)
def StatsBombFreeEvents(matchesdf, env='github'):
    """Function to create DataFrame with events from match.

        Args:
            matchesdf (DataFrame): dataframe of matches. Must have columns 'match_id',
                'competition_id', and 'season_id'.
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            df (DataFrame): DataFrame with all events from all matches.

    """
    res = []
    for ind in tqdm(matchesdf.index):
        events = get_matchFree(matchesdf[matchesdf.index == ind]['match_id'].values[0],
                               env=env)
        events.loc[:, 'competition_id'] = matchesdf[matchesdf.index == ind
                                                    ]['competition.competition_id'].values[0]
        events.loc[:, 'season_id'] = matchesdf[matchesdf.index == ind
                                               ]['season.season_id'].values[0]
        res.append(events)
    df = pd.concat(res, sort=True)
    return df


def get_lineupsFree(match_id, env='github'):
    """Function to retrieve lineup from a match.

        Args:
            match_id (int): id of the game.
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            events (DataFrame): DataFrame with all events.

        Raises:
            ValueError: wrong value for 'env'.
    """

    if env == 'github':
        events_url = config.data_url + f"lineups/{match_id}.json"
        raw_events_api = requests.get(url=events_url)
        raw_events_api.encoding = 'utf-8'
        events = pd.DataFrame(json_normalize(
            raw_events_api.json(), 'lineup', ['team_id', 'team_name']))
    elif env == 'local':
        events_url = config.data_dir + f"lineups/{match_id}.json"
        with open(events_url, encoding='utf-8') as json_file:
            raw_events_api = json.load(json_file)
        events = pd.DataFrame(json_normalize(
            raw_events_api, 'lineup', ['team_id', 'team_name']))
    else:
        raise ValueError(
            "'env' variable should be either 'local' or 'github'.")

    events.loc[:, 'match_id'] = match_id
    return events


def StatsBombFreelineups(matchesdf, env='github'):
    """Function to create DataFrame with events from match.

        Args:
            matchesdf (DataFrame): dataframe of matches. Must have columns 'match_id',
                'competition_id', and 'season_id'.
            env (str, optional): 'github' or 'local', environment to retrieve data from.

        Returns:
            df (DataFrame): DataFrame with all events from all matches.

    """
    res = []
    for ind in matchesdf.index:
        events = get_lineupsFree(matchesdf[matchesdf.index == ind]['match_id'].values[0],
                                 env=env)
        events.loc[:, 'competition_id'] = matchesdf[matchesdf.index == ind
                                                    ]['competition.competition_id'].values[0]
        events.loc[:, 'season_id'] = matchesdf[matchesdf.index == ind
                                               ]['season.season_id'].values[0]
        res.append(events)
    df = pd.concat(res, sort=True)
    return df

In [None]:
FreeCompetitions(env='github')