# Inspect data on Google Drive
This notebook guides you through the process of inspecting the data on your Google Drive account.

In [0]:
# Install Python package
!pip install matplotsoccer numpy==1.15.4

In [0]:
# Import standard module
import os

# Import Pandas library
import pandas as pd

# Import matplotsoccer library
import matplotsoccer

# Import Google Colab function
from google.colab import drive

In [0]:
# Access your Google Drive account
mount_point = '/content/gdrive'
drive.mount(mount_point)

In [0]:
drive_folder = 'My Drive'
directory = 'Wyscout'
path = os.path.join(mount_point, drive_folder, directory)

# Inspect competitions

In [0]:
file_competitions = os.path.join(path, 'competitions.h5')

In [0]:
df_competitions = pd.read_hdf(file_competitions, key='competitions')

In [0]:
df_competitions

# Inspect seasons

In [0]:
file_seasons = os.path.join(path, 'seasons.h5')

In [0]:
df_seasons = pd.read_hdf(file_seasons, key='seasons')

In [0]:
df_seasons

# Inspect overview

In [0]:
df_overview = df_competitions.merge(df_seasons, on='competition_id', how='left')

In [0]:
df_overview

# Inspect a season

In [0]:
# [10992, 181334, 185611]  # Eredivisie 2016/2017 - 2018/2019
# [10883, 181150, 185618]  # Premier League 2016/2017 - 2018/2019
# [10883, 10992, 181150, 181334, 185611, 185618]  # Eredivisie 2016/2017 - 2018/2019 + Premier League 2016/2017 - 2018/2019

season_id = 185611  # 2018/2019 Eredivisie season

## View matches

In [0]:
file_matches = os.path.join(os.path.join(path, f'season-{season_id}', 'matches.h5'))

In [0]:
df_matches = pd.read_hdf(file_matches, key='matches')

In [0]:
df_matches

## View team

In [0]:
file_teams = os.path.join(os.path.join(path, f'season-{season_id}', 'teams.h5'))

In [0]:
df_teams = pd.read_hdf(file_teams, key='teams').drop_duplicates(subset='team_id', keep='last')

In [0]:
df_teams

## Players

In [0]:
file_players = os.path.join(os.path.join(path, f'season-{season_id}', 'players.h5'))

In [0]:
df_players = pd.read_hdf(file_players, key='players').drop_duplicates(subset='player_id', keep='last')

In [0]:
df_players

## Match actions

In [0]:
file_actions = os.path.join(os.path.join(path, f'season-{season_id}', 'actions.h5'))

In [0]:
store_actions = pd.HDFStore(file_actions)

In [0]:
keys = store_actions.keys()

In [0]:
keys

In [0]:
df_actions = store_actions.get('match_2760914')

In [0]:
df_actions

## Add player names

In [0]:
mapping_players = pd.Series(data=df_players['short_name'].values, index=df_players['player_id'].values)

In [0]:
df_actions['player_name'] = df_actions['player_id'].map(mapping_players)

In [0]:
df_actions

## Add team names

In [0]:
mapping_teams = pd.Series(data=df_teams['short_name'].values, index=df_teams['team_id'].values)

In [0]:
df_actions['team_name'] = df_actions['team_id'].map(mapping_teams)

In [0]:
df_actions

## Visualize actions

In [0]:
df_actions_sample = df_actions.head(5)

In [0]:
matplotsoccer.actions(
    location=df_actions_sample[['start_x', 'start_y', 'end_x', 'end_y']],
    action_type=df_actions_sample['type_name'],
    team=df_actions_sample['team_name'],
    result=df_actions_sample['result'],
    label=df_actions_sample[['time_seconds', 'type_name', 'player_name', 'team_name']],
    labeltitle=['time', 'type', 'player', 'team'],
    zoom=False
)