## Team Analysis - Everton FC 2017/2018

In this file we will analyse Everton FC's season. We will look at metrics like Goals Scored, Goals Conceded, Assists, Goal Conversion Rate, Points, Possession, Set Piece Performance and so on...

In [15]:
# Importing libraires
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import pathlib
from bs4 import BeautifulSoup
import requests

In [16]:
# Importing data
# All the data from each match Bournemouth played in the 2017/18 season
# Data is from wyscout/events/events_England.json

path = os.path.join(str(pathlib.Path().resolve().parents[0]), 'src', 'wyscout', "events", 'events_England.json') 
with open(path) as f: 
    data = json.load(f) 
events = pd.DataFrame(data) 

events = events.loc[events.apply (lambda x: len(x.positions) == 2, axis = 1)]

events.head(5)

Unnamed: 0,eventId,subEventName,tags,playerId,positions,matchId,eventName,teamId,matchPeriod,eventSec,subEventId,id
0,8,Simple pass,[{'id': 1801}],25413,"[{'y': 49, 'x': 49}, {'y': 78, 'x': 31}]",2499719,Pass,1609,1H,2.758649,85,177959171
1,8,High pass,[{'id': 1801}],370224,"[{'y': 78, 'x': 31}, {'y': 75, 'x': 51}]",2499719,Pass,1609,1H,4.94685,83,177959172
2,8,Head pass,[{'id': 1801}],3319,"[{'y': 75, 'x': 51}, {'y': 71, 'x': 35}]",2499719,Pass,1609,1H,6.542188,82,177959173
3,8,Head pass,[{'id': 1801}],120339,"[{'y': 71, 'x': 35}, {'y': 95, 'x': 41}]",2499719,Pass,1609,1H,8.143395,82,177959174
4,8,Simple pass,[{'id': 1801}],167145,"[{'y': 95, 'x': 41}, {'y': 88, 'x': 72}]",2499719,Pass,1609,1H,10.302366,85,177959175


In [17]:
# We want to extract the data from all the teams in different dataframes so we can compare them 
# import teams data so we can get their team names
path = os.path.join(str(pathlib.Path().resolve().parents[0]), 'src', 'wyscout', "teams.json")
with open(path) as f: 
    data = json.load(f)
teams = pd.DataFrame(data)

teams.head(5)

Unnamed: 0,city,name,wyId,officialName,area,type
0,Newcastle upon Tyne,Newcastle United,1613,Newcastle United FC,"{'name': 'England', 'id': '0', 'alpha3code': '...",club
1,Vigo,Celta de Vigo,692,Real Club Celta de Vigo,"{'name': 'Spain', 'id': '724', 'alpha3code': '...",club
2,Barcelona,Espanyol,691,Reial Club Deportiu Espanyol,"{'name': 'Spain', 'id': '724', 'alpha3code': '...",club
3,Vitoria-Gasteiz,Deportivo Alav\u00e9s,696,Deportivo Alav\u00e9s,"{'name': 'Spain', 'id': '724', 'alpha3code': '...",club
4,Valencia,Levante,695,Levante UD,"{'name': 'Spain', 'id': '724', 'alpha3code': '...",club


In [18]:
# Bournemouth 1
bournemouth_id = 1659
bournemouth_events = events.loc[events['teamId'] == bournemouth_id]
bournemouth_name = teams.loc[teams['wyId'] == bournemouth_id]
bournemouth_name = bournemouth_name['officialName'].to_string(index=False)
bournemouth_events.insert(0, 'teamName', bournemouth_name)

# Arsenal 2
arsenal_id = 1609
arsenal_events = events.loc[events['teamId'] == arsenal_id]
arsenal_name = teams.loc[teams['wyId'] == arsenal_id]
arsenal_name = arsenal_name['officialName'].to_string(index=False)
arsenal_events.insert(0, 'teamName', arsenal_name)

# Brighton & Hove Albion 3
brighton_hove_albion_id = 1651
brighton_hove_albion_events = events.loc[events['teamId'] == brighton_hove_albion_id]
brighton_hove_albion_name = teams.loc[teams['wyId'] == brighton_hove_albion_id]
brighton_hove_albion_name = brighton_hove_albion_name['officialName'].to_string(index=False)
brighton_hove_albion_events.insert(0, 'teamName', brighton_hove_albion_name)

# Burnley 4
burnley_id = 1646
burnley_events = events.loc[events['teamId'] == burnley_id]
burnley_name = teams.loc[teams['wyId'] == burnley_id]
burnley_name = burnley_name['officialName'].to_string(index=False)
burnley_events.insert(0, 'teamName', burnley_name)

# West Brom 5
west_brom_id = 1627
west_brom_events = events.loc[events['teamId'] == west_brom_id]
west_brom_name = teams.loc[teams['wyId'] == west_brom_id]
west_brom_name = west_brom_name['city'].to_string(index=False)
west_brom_events.insert(0, 'teamName', west_brom_name)

# Chelsea 6
chelsea_id = 1610
chelsea_events = events.loc[events['teamId'] == chelsea_id]
chelsea_name = teams.loc[teams['wyId'] == chelsea_id]
chelsea_name = chelsea_name['officialName'].to_string(index=False)
chelsea_events.insert(0, 'teamName', chelsea_name)

# Crystal Palace 7
crystal_palace_id = 1628
crystal_palace_events = events.loc[events['teamId'] == crystal_palace_id]
crystal_palace_name = teams.loc[teams['wyId'] == crystal_palace_id]
crystal_palace_name = crystal_palace_name['officialName'].to_string(index=False)
crystal_palace_events.insert(0, 'teamName', crystal_palace_name)

# Everton 8
everton_id = 1623
everton_events = events.loc[events['teamId'] == everton_id]
everton_name = teams.loc[teams['wyId'] == everton_id]
everton_name = everton_name['officialName'].to_string(index=False)
everton_events.insert(0, 'teamName', everton_name)

# Huddersfield Town 9
huddersfield_town_id = 1673
huddersfield_town_events = events.loc[events['teamId'] == huddersfield_town_id]
huddersfield_town_name = teams.loc[teams['wyId'] == huddersfield_town_id]
huddersfield_town_name = huddersfield_town_name['officialName'].to_string(index=False)
huddersfield_town_events.insert(0, 'teamName', huddersfield_town_name)

# Leicester City 10
leicester_city_id = 1631
leicester_city_events = events.loc[events['teamId'] == leicester_city_id]
leicester_city_name = teams.loc[teams['wyId'] == leicester_city_id]
leicester_city_name = leicester_city_name['officialName'].to_string(index=False)
leicester_city_events.insert(0, 'teamName', leicester_city_name)

# Liverpool 11
liverpool_id = 1612
liverpool_events = events.loc[events['teamId'] == liverpool_id]
liverpool_name = teams.loc[teams['wyId'] == liverpool_id]
liverpool_name = liverpool_name['officialName'].to_string(index=False)
liverpool_events.insert(0, 'teamName', liverpool_name)

# Manchester City 12
manchester_city_id = 1625
manchester_city_events = events.loc[events['teamId'] == manchester_city_id]
manchester_city_name = teams.loc[teams['wyId'] == manchester_city_id]
manchester_city_name = manchester_city_name['officialName'].to_string(index=False)
manchester_city_events.insert(0, 'teamName', manchester_city_name)

# Manchester United 13
manchester_city_events_id = 1611
manchester_united_events = events.loc[events['teamId'] == manchester_city_events_id]
manchester_united_name = teams.loc[teams['wyId'] == manchester_city_events_id]
manchester_united_name = manchester_united_name['officialName'].to_string(index=False)
manchester_united_events.insert(0, 'teamName', manchester_united_name)

# Newcastle United 14
newcastle_united_id = 1613
newcastle_united_events = events.loc[events['teamId'] == newcastle_united_id]
newcastle_united_name = teams.loc[teams['wyId'] == newcastle_united_id]
newcastle_united_name = newcastle_united_name['officialName'].to_string(index=False)
newcastle_united_events.insert(0, 'teamName', newcastle_united_name)

# Southampton 15
southampton_id = 1619
southampton_events = events.loc[events['teamId'] == southampton_id]
southampton_name = teams.loc[teams['wyId'] == southampton_id]
southampton_name = southampton_name['officialName'].to_string(index=False)
southampton_events.insert(0, 'teamName', southampton_name)

# Stoke City 16
stoke_city_id = 1639
stoke_city_events = events.loc[events['teamId'] == stoke_city_id]
stoke_city_name = teams.loc[teams['wyId'] == stoke_city_id]
stoke_city_name = stoke_city_name['officialName'].to_string(index=False)
stoke_city_events.insert(0, 'teamName', stoke_city_name)

# Swansea 17
swansea_id = 10531
swansea_events = events.loc[events['teamId'] == swansea_id]
swansea_name = teams.loc[teams['wyId'] == swansea_id]
swansea_name = swansea_name['city'].to_string(index=False)
swansea_events.insert(0, 'teamName', swansea_name)

# Tottenham 18
tottenham_id = 1624
tottenham_events = events.loc[events['teamId'] == tottenham_id]
tottenham_name = teams.loc[teams['wyId'] == tottenham_id]
tottenham_name = tottenham_name['officialName'].to_string(index=False)
tottenham_events.insert(0, 'teamName', tottenham_name)

# Watford 19
watford_id = 1644
watford_events = events.loc[events['teamId'] == watford_id]
watford_name = teams.loc[teams['wyId'] == watford_id]
watford_name = watford_name['officialName'].to_string(index=False)
watford_events.insert(0, 'teamName', watford_name)

# West Ham United 20
west_ham_united_id = 1633
west_ham_united_events = events.loc[events['teamId'] == west_ham_united_id]
west_ham_united_name = teams.loc[teams['wyId'] == west_ham_united_id]
west_ham_united_name = west_ham_united_name['officialName'].to_string(index=False)
west_ham_united_events.insert(0, 'teamName', west_ham_united_name)

team_names = [bournemouth_name, arsenal_name, brighton_hove_albion_name, burnley_name, west_brom_name, chelsea_name, 
              crystal_palace_name, everton_name, huddersfield_town_name, leicester_city_name, liverpool_name, 
              manchester_city_name, manchester_united_name, newcastle_united_name, southampton_name, stoke_city_name, 
              tottenham_name, watford_name, swansea_name, west_ham_united_name]

team_ids = [bournemouth_id, arsenal_id, brighton_hove_albion_id, burnley_id, west_brom_id, chelsea_id, 
              crystal_palace_id, everton_id, huddersfield_town_id, leicester_city_id, liverpool_id, 
              manchester_city_id, manchester_city_events_id, newcastle_united_id, southampton_id, stoke_city_id, 
              tottenham_id, watford_id, swansea_id, west_ham_united_id]

In [19]:
# import match data so we can get stats from the teams performance
path = os.path.join(str(pathlib.Path().resolve().parents[0]), 'src', 'wyscout', "matches", 'matches_England.json')
with open(path) as f: 
    data = json.load(f)
matches = pd.DataFrame(data)

matches.head(5)

Unnamed: 0,status,roundId,gameweek,teamsData,seasonId,dateutc,winner,venue,wyId,label,date,referees,duration,competitionId
0,Played,4405654,38,"{'1646': {'scoreET': 0, 'coachId': 8880, 'side...",181150,2018-05-13 14:00:00,1659,Turf Moor,2500089,"Burnley - AFC Bournemouth, 1 - 2","May 13, 2018 at 4:00:00 PM GMT+2","[{'refereeId': 385705, 'role': 'referee'}, {'r...",Regular,364
1,Played,4405654,38,"{'1628': {'scoreET': 0, 'coachId': 8357, 'side...",181150,2018-05-13 14:00:00,1628,Selhurst Park,2500090,"Crystal Palace - West Bromwich Albion, 2 - 0","May 13, 2018 at 4:00:00 PM GMT+2","[{'refereeId': 381851, 'role': 'referee'}, {'r...",Regular,364
2,Played,4405654,38,"{'1609': {'scoreET': 0, 'coachId': 7845, 'side...",181150,2018-05-13 14:00:00,1609,The John Smith's Stadium,2500091,"Huddersfield Town - Arsenal, 0 - 1","May 13, 2018 at 4:00:00 PM GMT+2","[{'refereeId': 384965, 'role': 'referee'}, {'r...",Regular,364
3,Played,4405654,38,"{'1651': {'scoreET': 0, 'coachId': 8093, 'side...",181150,2018-05-13 14:00:00,1612,Anfield,2500092,"Liverpool - Brighton & Hove Albion, 4 - 0","May 13, 2018 at 4:00:00 PM GMT+2","[{'refereeId': 385704, 'role': 'referee'}, {'r...",Regular,364
4,Played,4405654,38,"{'1644': {'scoreET': 0, 'coachId': 93112, 'sid...",181150,2018-05-13 14:00:00,1611,Old Trafford,2500093,"Manchester United - Watford, 1 - 0","May 13, 2018 at 4:00:00 PM GMT+2","[{'refereeId': 381853, 'role': 'referee'}, {'r...",Regular,364


In [20]:
# We can now check the amount of matches each team won, lost and drew, how many goals they scored and conceded

# Create a dataframe for the stats
# Let the team_ids be the index for the dataframe
# Create a column for each stat
# Set all the values to 0
# We will then update the values with the stats from the matches dataframe
stats = pd.DataFrame(index=team_ids, columns=['team','games_played', 'wins', 'draws', 'losses', 'goals_for', 'goals_against', 'goal_difference', 'points'])

teams = [str(i) for i in team_names]
stats['team'] = teams

stats = stats.fillna(0)

# Loop through each match
# Check if the team is the home team or away team
# Update the stats accordingly
# We need to check the teamid corresponds to what name and place the stats in the correct row

for index, row in matches.iterrows():

    # Get the home and away team id
    home_team = row['teamsData'][str(list(row['teamsData'].keys())[0])]['teamId']
    away_team = row['teamsData'][str(list(row['teamsData'].keys())[1])]['teamId']

    # Get the score for the home and away team
    home_score = row['teamsData'][str(list(row['teamsData'].keys())[0])]['score']
    away_score = row['teamsData'][str(list(row['teamsData'].keys())[1])]['score']
    
    # Home team wins
    if home_score > away_score:
        stats.at[home_team, 'wins'] += 1
        stats.at[away_team, 'losses'] += 1
        stats.at[home_team, 'points'] += 3

    # Away team wins
    elif home_score < away_score:
        stats.at[home_team, 'losses'] += 1
        stats.at[away_team, 'wins'] += 1
        stats.at[away_team, 'points'] += 3
    
    # Draw
    else:
        stats.at[home_team, 'draws'] += 1
        stats.at[away_team, 'draws'] += 1
        stats.at[home_team, 'points'] += 1
        stats.at[away_team, 'points'] += 1

    # Update goals scored, conceded and games played
    stats.at[home_team, 'goals_for'] += home_score
    stats.at[home_team, 'goals_against'] += away_score
    stats.at[away_team, 'goals_for'] += away_score
    stats.at[away_team, 'goals_against'] += home_score
    stats.at[home_team, 'games_played'] += 1
    stats.at[away_team, 'games_played'] += 1

# Goal difference = goals scored - goals conceded
stats['goal_difference'] = stats['goals_for'] - stats['goals_against']

# Sort stats by points
stats = stats.sort_values(by=['points'], ascending=False)
stats

Unnamed: 0,team,games_played,wins,draws,losses,goals_for,goals_against,goal_difference,points
1625,Manchester City FC,38,32,4,2,106,27,79,100
1611,Manchester United FC,38,25,6,7,68,28,40,81
1624,Tottenham Hotspur FC,38,23,8,7,74,36,38,77
1612,Liverpool FC,38,21,12,5,84,38,46,75
1610,Chelsea FC,38,21,7,10,62,38,24,70
1609,Arsenal FC,38,19,6,13,74,51,23,63
1646,Burnley FC,38,14,12,12,36,39,-3,54
1623,Everton FC,38,13,10,15,44,58,-14,49
1631,Leicester City FC,38,12,11,15,56,60,-4,47
1613,Newcastle United FC,38,12,8,18,39,47,-8,44


In [50]:
# sort stats on goals scored
# We can see here that Everton is 11th in the leage on scored goals
stats = stats.sort_values(by=['goals_for'], ascending=False)
stats

Unnamed: 0,team,games_played,wins,draws,losses,goals_for,goals_against,goal_difference,points
1625,Manchester City FC,38,32,4,2,106,27,79,100
1612,Liverpool FC,38,21,12,5,84,38,46,75
1624,Tottenham Hotspur FC,38,23,8,7,74,36,38,77
1609,Arsenal FC,38,19,6,13,74,51,23,63
1611,Manchester United FC,38,25,6,7,68,28,40,81
1610,Chelsea FC,38,21,7,10,62,38,24,70
1631,Leicester City FC,38,12,11,15,56,60,-4,47
1633,West Ham United FC,38,10,12,16,48,68,-20,42
1628,Crystal Palace FC,38,11,11,16,45,55,-10,44
1659,AFC Bournemouth,38,11,11,16,45,61,-16,44


In [52]:
# sort stats on goals conceded
# We can see here that Everton is 15th in the leage on conceded goals
stats = stats.sort_values(by=['goals_against'], ascending=True)
stats

Unnamed: 0,team,games_played,wins,draws,losses,goals_for,goals_against,goal_difference,points
1625,Manchester City FC,38,32,4,2,106,27,79,100
1611,Manchester United FC,38,25,6,7,68,28,40,81
1624,Tottenham Hotspur FC,38,23,8,7,74,36,38,77
1612,Liverpool FC,38,21,12,5,84,38,46,75
1610,Chelsea FC,38,21,7,10,62,38,24,70
1646,Burnley FC,38,14,12,12,36,39,-3,54
1613,Newcastle United FC,38,12,8,18,39,47,-8,44
1609,Arsenal FC,38,19,6,13,74,51,23,63
1651,Brighton & Hove Albion FC,38,9,13,16,34,54,-20,40
1628,Crystal Palace FC,38,11,11,16,45,55,-10,44


In [69]:
# Now we will look at the ammount of shots made by each team compared to the ammount of goals scored

all_shots = events.loc[events["subEventName"] == "Shot"]
all_shots["Goal"] = all_shots.tags.apply(lambda x: 1 if {'id':101} in x else 0).astype(object)
shots = all_shots.groupby(["teamId"]).agg({"id":"count", "Goal":"sum"})
shots = shots.rename(columns={"id":"Shots", "Goal":"Goals"})
shots = shots.reset_index()
shots = shots.sort_values(by=['Goals'], ascending=False)
shots['teamId'] = shots['teamId'].astype(str)
shots['teamId'] = shots['teamId'].replace({'1659': 'Bournemouth', '1609': 'Arsenal', '1651': 'Brighton & Hove Albion', '1646': 'Burnley', '1627': 'West Bromwich Albion', '1610': 'Chelsea', '1628': 'Crystal Palace', '1623': 'Everton', '1673': 'Huddersfield Town', '1631': 'Leicester City', '1612': 'Liverpool', '1625': 'Manchester City', '1611': 'Manchester United', '1613': 'Newcastle United', '1619': 'Southampton', '1639': 'Stoke City', '10531': 'Swansea City', '1624': 'Tottenham Hotspur', '1644': 'Watford', '1633': 'West Ham United'})
# We can calculate the ratio of goals scored to shots made and see that Everton is 8th in the league. Which means that they are quite efficient with their shots.
shots['ratio'] = shots['Goals'] / shots['Shots']
shots = shots.sort_values(by=['ratio'], ascending=False)
shots = shots.reset_index(drop=True) # reset index
shots

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_shots["Goal"] = all_shots.tags.apply(lambda x: 1 if {'id':101} in x else 0).astype(object)


Unnamed: 0,teamId,Shots,Goals,ratio
0,Manchester City,603,94,0.155887
1,Manchester United,466,64,0.137339
2,West Ham United,328,45,0.137195
3,Liverpool,600,78,0.13
4,Arsenal,538,69,0.128253
5,Leicester City,375,47,0.125333
6,Tottenham Hotspur,568,67,0.117958
7,Everton,320,37,0.115625
8,Chelsea,550,55,0.1
9,Bournemouth,413,41,0.099274


### Conclusion
By looking at the goals scored, goals conceded, and the effectiveness of the offence, we can see that the offence is performing okay but the defence needs improvement. As we placed 11th in the league on scored goals and 8th in the league on shot effectiveness, but only 15th on goals conceded, this is the area we need to improve most on for right now. 