In [1]:
import pandas as pd
import numpy as np
from IPython.display import JSON

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# os.chdir("../TeamData")
df_team = pd.read_csv("./TeamData/team_data.csv")
df_player = pd.read_csv("./Player Data/player_stats.csv")

FileNotFoundError: [Errno 2] No such file or directory: './TeamData/team_data.csv'

In [None]:
df_team.columns

In [None]:
df_player.columns

<strong>What we need to figure out:<strong>
- Overview: Game played?, Goal scored?, Yellow Card?, Red Cards?
- Card Stats
- Most team have Shots, Corners, Fouls, Yellow Cards, Red Cards.
- Frequent Scoreline (First Half, Fulltime, Extra Time).
- InviduTop Goals, Top assists, Top Clean Sheets, Most Games

# Overview

In [None]:
games_played = df_team['games'].sum() /2
goals_scored = df_team['goals'].sum()
total_yellow_card = df_team['cards_yellow'].sum()
total_red_card = df_team['cards_red'].sum()
players_age_average = df_team['avg_age'].sum() / 32

print("GAMES PLAYED: ", games_played)
print("GOALS SCORED: ", goals_scored)
print("YELLOW CARD: ", total_yellow_card)
print("RED CARD: ", total_red_card)
print("PLAYERS AGE AVERAGE: ", players_age_average)

# Card Stats

## Number of Yellow and Red Cards in Worldcup 2022

In [None]:
team = df_team['team']
yellow_cards = df_team['cards_yellow']
red_cards = df_team['cards_red']
width = 0.8

fig, ax = plt.subplots(figsize=(16, 10))
ax.bar(team, yellow_cards, width, label='Yellow Cards', color='yellow')
ax.bar(team, red_cards, width, bottom=yellow_cards, label='Red Cards', color='red')
ax.set_ylabel('Amounts of Cards', fontsize=14)
ax.set_title('Number of Yellow and Red Cards in Worldcup 2022', fontsize=20, fontweight='bold')
ax.legend(fontsize=12)
plt.xticks(team, rotation=90, fontsize=12)
plt.yticks(np.arange(0,20,2))

# Add annotation to bars
for index, data in enumerate(yellow_cards):
    plt.text(x=index, y=1, s=f"{data}", ha='center')

for index, data in enumerate(red_cards):
    plt.text(x=index, y=data + yellow_cards[index] + 0.5, s=f"{data}", size=14, ha='center')



plt.show()

## Number of cards (yellow + red) per position

In [None]:
cards_per_position = df_player[['player', 'position', 'cards_yellow', 'cards_red']].groupby('position').agg({'cards_yellow':'sum', 'cards_red':'sum', 'player':'count'})
cards_per_position

In [None]:
cards_per_position = df_player[['player', 'position', 'cards_yellow', 'cards_red']].groupby('position').agg({'cards_yellow':'sum', 'cards_red':'sum', 'player':'count'})
cards_per_position.rename(index={'GK':'Goalkeeper', 'DF':'Defender', 'MF':'Midfielder', 'FW':'Forward'}, inplace=True)
cards_per_position['Total Cards'] = cards_per_position.cards_yellow + cards_per_position.cards_red
cards_per_position = cards_per_position.sort_values(by=['Total Cards'])

positions = cards_per_position.index
total_cards_per_position = cards_per_position['Total Cards']

fig, ax = plt.subplots(figsize=(16,12))
ax.barh(positions, total_cards_per_position)
ax.spines[['top', 'bottom', 'left', 'right']].set_visible(False) # Hide the top, bottom, left and right axis
ax.set_title('Number of Yellow and Red Cards per position', fontsize=20, fontweight='bold')
plt.xticks(fontsize=12)
plt.yticks(fontsize=15)

# Add annotation to bar
for i in ax.patches:
    plt.text(x=i.get_width() + 0.2, y=i.get_y() + 0.35, s=str(i.get_width()), fontsize=15)

plt.show()

# Goal and Assists

## Amount of Goals and Assists in Worldcup 2022

In [None]:
df_goal_assist = df_team[['team', 'goals', 'assists']].copy()
df_goal_assist['goals_and_assists'] = df_goal_assist.goals + df_goal_assist.assists
df_goal_assist.sort_values(by=['goals_and_assists'], ascending=False, inplace=True)
df_goal_assist.reset_index(drop=True, inplace=True)

team = df_goal_assist['team']
goals = df_goal_assist['goals']
assists = df_goal_assist['assists']
width = 0.8

fig, ax = plt.subplots(figsize=(16, 10))
ax.bar(team, goals, width, label='Goals', color='limegreen')
ax.bar(team, assists, width, bottom=goals, label='Assists', color='lime')
ax.spines[['top', 'bottom', 'left', 'right']].set_visible(False) # Hide the top, bottom, left and right axis
ax.set_title('Amount of Goals and Assists in Worldcup 2022', fontsize=20, fontweight='bold')
ax.legend(fontsize=12)
plt.xticks(team, rotation=90, fontsize=12)
plt.tick_params(left = False, labelleft = False)

# Add annotation to bars
for index, goal in enumerate(goals):
    plt.text(x=index, y=goal / 2, s=f"{goal}", ha='center')

for index, assist in enumerate(assists):
    plt.text(x=index, y=goals[index] + assist /2, s=f"{assist}",ha='center')

    
plt.show()

## TOP 10 Goalscorer

In [None]:
df_player[['player', 'goals']].sort_values(by='goals', ascending=False).head(10).plot(x='player', kind='bar', figsize=(16,12), width=0.8,
                                                                                     color='chartreuse', edgecolor='forestgreen')
plt.title('TOP 10 Goalscorer', loc='left', size=15, fontweight='bold')
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(None)
plt.legend([])

# Add annotation to charts
for index, goal in enumerate(df_player[['player', 'goals']].sort_values(by='goals', ascending=False).head(10)['goals']):
    plt.text(x=index, y=goal * 0.9, s=f"{goal}", fontsize=16, ha='center')

plt.show()

## TOP 10 Assists

In [None]:
df_player[['player', 'assists']].sort_values(by='assists', ascending=False).head(10).plot(x='player', kind='bar', figsize=(16,12), width=0.8,
                                                                                     color='chartreuse', edgecolor='forestgreen')
plt.title('TOP 10 Assists Player', loc='left', size=15, fontweight='bold')
plt.xticks(rotation=45, fontsize=14)
plt.yticks(np.arange(0,4,1),fontsize=14)
plt.xlabel(None)
plt.legend([])

# Add annotation to charts
for index, goal in enumerate(df_player[['player', 'assists']].sort_values(by='assists', ascending=False).head(10)['assists']):
    plt.text(x=index, y=goal * 0.9, s=f"{goal}", fontsize=16, ha='center')

plt.show()

## Goals by position

In [None]:
def func_goal(x, y):
    absolute = int(round(x/100.*np.sum(y)))
    return "{:.1f}%\n({:d} Goals)".format(x, absolute)


# params
df_goals_per_position = df_player[['position', 'goals']].groupby('position').sum().sort_values(by='goals', ascending=False)
goals_per_position = df_goals_per_position.goals.tolist()
my_labels = df_goals_per_position.rename(index={'GK':'Goalkeeper', 'DF':'Defender', 'MF':'Midfielder', 'FW':'Forward'}).index.tolist()
my_colors = ['seagreen', 'mediumseagreen', 'springgreen', 'mediumspringgreen']
my_explode = [0.1, 0, 0, 0]


# Pie chart
fig, ax = plt.subplots(figsize=(12, 8))
ax.pie(goals_per_position, explode=my_explode, labels=my_labels, colors=my_colors, autopct=lambda x: func_goal(x, goals_per_position), startangle=10, shadow=True)
ax.set_title('Goals by position', loc='left', size=16, fontweight='bold')


plt.show()

## Assists by position

In [None]:
def func_assist(x, y):
    absolute = int(round(x/100.*np.sum(y)))
    return "{:.1f}%\n({:d} Assists)".format(x, absolute)

# params
df_goals_per_position = df_player[['position', 'assists']].groupby('position').sum().sort_values(by='assists', ascending=False)
assists_per_position = df_goals_per_position.assists.tolist()
my_labels = df_goals_per_position.rename(index={'GK':'Goalkeeper', 'DF':'Defender', 'MF':'Midfielder', 'FW':'Forward'}).index.tolist()
my_colors = ['seagreen', 'mediumseagreen', 'springgreen', 'mediumspringgreen']
my_explode = [0.1, 0, 0, 0]

# Pie chart
fig, ax = plt.subplots(figsize=(12, 8))
ax.pie(goals_per_position, explode=my_explode, labels=my_labels, colors=my_colors, autopct=lambda x: func_assist(x, assists_per_position), startangle=15, shadow=True)
ax.set_title('Assists by position', loc='left', size=16, fontweight='bold')


plt.show()

# Passed Attempted

In [None]:
df_player_passing = pd.read_csv("./Player Data/player_passing.csv")

In [None]:
df_player_passing[['player', 'passes']].sort_values(by='passes', ascending=False).head(10).plot(x='player', kind='bar', figsize=(16,12), width=0.8,
                                                                                     color='chartreuse', edgecolor='forestgreen')
plt.title('TOP 10 Passed Attempted', loc='left', size=15, fontweight='bold')
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(None)
plt.legend([])

# Add annotation to charts
for index, goal in enumerate(df_player_passing[['player', 'passes']].sort_values(by='passes', ascending=False).head(10)['passes']):
    plt.text(x=index, y=goal * 0.9, s=f"{goal}", fontsize=16, ha='center')

plt.show()

In [None]:
df_player_passing.loc[df_player_passing.passes > 100, ['player', 'passes_pct']].sort_values(by='passes_pct', ascending=False).head(10).plot(x='player', kind='bar', figsize=(16,12), width=0.8, 
                                                                                                                                            color='chartreuse', edgecolor='forestgreen')
plt.title('TOP 10 Accurate Passes Attempted', loc='left', size=15, fontweight='bold')
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(None)
plt.legend([])

# Add annotation to charts
for index, pct in enumerate(df_player_passing.loc[df_player_passing.passes > 100, ['player', 'passes_pct']].sort_values(by='passes_pct', ascending=False).head(10)['passes_pct']):
    plt.text(x=index, y=pct * 0.9, s=f"{pct}%", fontsize=16, ha='center')

plt.show()

# Player Age

## List of the 10 oldest players

In [3]:
df_old_players = df_player[['player', 'age']].sort_values(by='age', ascending=False).head(10)
df_old_players['age_years'] = df_old_players.age.str.split('-', expand=True)[0].apply(pd.to_numeric)
df_old_players['age_days'] = df_old_players.age.str.split('-', expand=True)[1].apply(pd.to_numeric)
name = df_old_players.player
age = df_old_players['age_years']

fig, ax = plt.subplots(figsize=(16,10))
ax.bar(name, age, color='chartreuse', edgecolor='forestgreen')
ax.set_title('The oldest players', loc='left', size=16, fontweight='bold')
ax.spines[['top', 'bottom', 'left', 'right']].set_visible(False) # Hide the top, bottom, left and right axis
ax.invert_yaxis()
plt.xticks(rotation=45, fontsize=14)
plt.tick_params(left = False, labelleft = False)



# Add annotation to charts
for index, year in enumerate(df_old_players['age_years']):
    for day in df_old_players['age_days'].apply(pd.to_numeric): pass
    plt.text(x=index, y=year * 0.95, s=f"{year} years\n{day} days", fontsize=12, ha='center')
    

plt.show()

NameError: name 'df_player' is not defined

## List of the 10 youngest players

In [None]:
df_young_players = df_player[['player', 'age']].sort_values(by='age', ascending=True).head(10)
df_young_players['age_years'] = df_young_players.age.str.split('-', expand=True)[0].apply(pd.to_numeric)
df_young_players['age_days'] = df_young_players.age.str.split('-', expand=True)[1].apply(pd.to_numeric)
name = df_young_players.player
age = df_young_players['age_years']

fig, ax = plt.subplots(figsize=(16,10))
ax.bar(name, age, color='chartreuse', edgecolor='forestgreen')
ax.set_title('The youngest players', loc='left', size=16, fontweight='bold')
ax.spines[['top', 'bottom', 'left', 'right']].set_visible(False) # Hide the top, bottom, left and right axis
ax.invert_yaxis()
plt.xticks(rotation=45, fontsize=14)
plt.tick_params(left = False, labelleft = False)

# Add annotation to charts
for index, year in enumerate(df_young_players['age_years']):
    for day in df_young_players['age_days'].apply(pd.to_numeric): pass
    plt.text(x=index, y=year * 0.95, s=f"{year} years\n{day} days", fontsize=12, ha='center')
    

plt.show()

## Average age by position

In [None]:
df_team[['team', 'avg_age']]

In [None]:
def leapdays(y1, y2):
    y1 -= 1
    y2 -= 1
    return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400)

leapdays(y1, y2)

In [None]:
df_player.loc[df_player['team'] == 'Argentina', ['player' ,'position', 'team', 'age', 'birth_year']].sort_values(by='position')

In [None]:
player_argentina = df_player.loc[df_player['team'] == 'Argentina', ['player' ,'position', 'team', 'age', 'birth_year']].copy()
player_argentina['age_year'] = player_argentina['age'].str.split('-', expand=True)[0].apply(pd.to_numeric)
player_argentina['age_day'] = player_argentina['age'].str.split('-', expand=True)[1].apply(pd.to_numeric)
player_argentina['age_days'] = player_argentina['age_year'] * 365 + player_argentina['age_day'] + df_player_age['birth_year'].apply(lambda x: leapdays(x, 2022))
player_argentina

In [None]:
(player_argentina['age_days'].sum() + 13235 + 11192) / 26 / 365

In [None]:
df_player_age = df_player[['player' ,'position', 'team', 'age', 'birth_year']].copy()
df_player_age

## Average age of team in Worldcup 2022

## Age range in all teams in Worldcup 2022