# Importing libraries

In [1]:
import pandas as pd
import json
import numpy as np

# Read parquet file

In [2]:
file_path = "../data/eng_1516_events.parquet"

In [3]:
df = pd.read_parquet(file_path)

### Saving column names

In [4]:
column_list = df.columns.tolist()

### Utility functions

In [5]:
players_df = df[["player_id", "player"]].drop_duplicates().dropna()

In [6]:
def add_name_players(metric_df: pd.DataFrame, players_df: pd.DataFrame):
    """Add the players names to metric dataframe

    Parameters
    ----------
    metric_df : pd.DataFrame
        player_id, metric
    players_df : pd.DataFrame
        player_id, player_name
    """
    return pd.merge(left=metric_df, right=players_df, on="player_id")

In [7]:
def head_col(col_name: str, df: pd.DataFrame) -> pd.DataFrame:
    return df[~df[col_name].isna()].dropna(axis=1)

In [8]:
def check_column(partial_name: str, column_list: list[str]) -> None:
    for col in column_list:
        if partial_name in col:
            print(col)

In [9]:
check_column("tactics", column_list)

tactics


# Task 1

## Which player attempted the most shots?

In [10]:
player_shot_df = (
    df[df["type"] == "Shot"]
    .groupby(by=["player_id", "player"])
    .size()
    .reset_index(name="shot_count")
    .sort_values(by="shot_count", ascending=False, ignore_index=True)
)

In [11]:
player_shot_df.head()

Unnamed: 0,player_id,player,shot_count
0,10955.0,Harry Kane,158
1,3237.0,Sergio Leonel Agüero del Castillo,118
2,10960.0,Jamie Vardy,118
3,3289.0,Romelu Lukaku Menama,116
4,5458.0,Odion Jude Ighalo,111


## Which team created the most expected goals?

In [12]:
team_xg_df = (
    df.groupby(by=["team_id", "team"])
    .agg({"shot_statsbomb_xg": "sum"})
    .reset_index()
    .sort_values(by="shot_statsbomb_xg", ascending=False)
)

In [13]:
team_xg_df.head()

Unnamed: 0,team_id,team,shot_statsbomb_xg
1,22,Leicester City,66.282171
0,1,Arsenal,65.136126
12,36,Manchester City,64.555055
14,38,Tottenham Hotspur,62.356203
3,24,Liverpool,58.373657


In [14]:
most_xg_team = int(team_xg_df.iloc[0].team_id)

## How many different players were awarded a yellow card?

In [15]:
yellow_count = df[
    (df["foul_committed_card"] == "Yellow Card") | 
    (df["bad_behaviour_card"] == "Yellow Card")]["player_id"].nunique()
yellow_count

365

## Which Liverpool player assisted the most shots with their right foot?

In [16]:
chosen_team = df[df["team"] == "Liverpool"]["team_id"].unique()[0]
chosen_team

np.int64(24)

I took into consideration both the goals and the shots that did not convert to goals.

In [17]:
cb_assist_p_df = (
    df[
        ((df["pass_shot_assist"]) | (df["pass_goal_assist"]))
        & (df["pass_body_part"] == "Right Foot")
        & (df["team_id"] == chosen_team)                        # Chosen team id
    ]
    .groupby(by=["player_id", "player"])
    .size()
    .reset_index(name="shot_assist_count")
    .sort_values(by="shot_assist_count", ascending=False)
)

In [18]:
cb_assist_p_df.head()

Unnamed: 0,player_id,player,shot_assist_count
2,3473.0,James Philip Milner,42
6,3501.0,Philippe Coutinho Correia,41
10,3535.0,Roberto Firmino Barbosa de Oliveira,31
13,4090.0,Adam David Lallana,27
15,4590.0,Nathaniel Edwin Clyne,25


## Which Liverpool player applied the most counterpressures?

In [19]:
counterpress_player_df = (
    df[(df["counterpress"]) & (df["team_id"] == chosen_team)]
    .groupby(by=["player_id", "player"])
    .size()
    .reset_index(name="counterpressure_count")
    .sort_values(by="counterpressure_count", ascending=False)
)

In [20]:
counterpress_player_df.head()

Unnamed: 0,player_id,player,counterpressure_count
18,7780.0,Lucas Pezzini Leiva,257
2,3473.0,James Philip Milner,252
4,3493.0,Emre Can,248
10,3535.0,Roberto Firmino Barbosa de Oliveira,240
13,4090.0,Adam David Lallana,211


# Task 2

## Aerial Wins

In [21]:
def get_aerial_wins(df: pd.DataFrame) -> pd.DataFrame:
    """Extract the number of aerial wins for player.

    Parameters
    ----------
    df : pd.DataFrame
        Event data

    Returns
    -------
    pd.DataFrame
        DataFrame with player_id, aerial_win_count as columns
    """
    return (
        df[
            (df["clearance_aerial_won"])
            | (df["miscontrol_aerial_won"])
            | (df["pass_aerial_won"])
            | (df["shot_aerial_won"])
        ]
        .groupby(by=["player_id","player"])
        .size()
        .reset_index(name="aerial_win_count")
        .sort_values(by="aerial_win_count", ascending=False, ignore_index=True)
    )

In [22]:
aerial_win_players = get_aerial_wins(df=df)

### Best 5 players according to Aerial Wins

In [23]:
aerial_win_players.head(5)

Unnamed: 0,player_id,player,aerial_win_count
0,3454.0,Troy Deeney,278
1,19362.0,Rudy Gestede,239
2,3669.0,Virgil van Dijk,203
3,10958.0,Chris Smalling,199
4,3523.0,Craig Dawson,157


## Aerial Win Percentage

In [24]:
def get_aerial_wins_perc(df: pd.DataFrame) -> pd.DataFrame:
    """First, extract the Aerial Win count with the method 'get_aerial_wins'.
    Then, extract the number of lost aerial duels using the feature 'duel_type'
    with value "Aerial Lost" (id = 10); I did not use the feature 'is_aerial_won' = False
    because it is set to false also for all the events, even those which are not duels.
    Eventually, compute the percentage.

    Parameters
    ----------
    df : pd.DataFrame
        Event data

    Returns
    -------
    pd.DataFrame
        DataFrame with player_id, aerial_win_percentage
    """
    # Get the Aerial wins for each player with the get_aerial_wins method
    aerial_wins_df = get_aerial_wins(df=df)
    # Get only the aerial lost events
    aerial_lost_df = (
        df[
            (df["duel_type"] == "Aerial Lost")  # Duel of type "Aerial Lost"
        ]
        .groupby(by="player_id")
        .size()
        .reset_index(name="aerial_lost_count")
    )
    # Join the two dataframes with aerial wins and lost counts
    aerial_w_l_df = pd.merge(
        left=aerial_wins_df, right=aerial_lost_df, how="inner", on="player_id"
    )
    # Compute the percentage
    aerial_w_l_df["aerial_total_count"] = (
        aerial_w_l_df["aerial_win_count"] + aerial_w_l_df["aerial_lost_count"]
    )
    aerial_w_l_df["aerial_win_percentage"] = (
        aerial_w_l_df["aerial_win_count"] / aerial_w_l_df["aerial_total_count"]
    )

    aerial_w_l_df = aerial_w_l_df.drop(
        columns=["aerial_win_count", "aerial_lost_count", "aerial_total_count"]
    )

    return aerial_w_l_df.sort_values(
        by="aerial_win_percentage", ascending=False, ignore_index=True
    )

In [25]:
aerial_win_perc_players = get_aerial_wins_perc(df=df)

In [26]:
aerial_win_perc_players.head()

Unnamed: 0,player_id,player,aerial_win_percentage
0,4428.0,Mathieu Debuchy,0.875
1,3101.0,Vincent Kompany,0.863636
2,42946.0,Guangtai Jiang,0.833333
3,3471.0,Dejan Lovren,0.807143
4,4826.0,Yohan Benalouane,0.8


## Long Balls

Make sure that the completed passes have the outcome feature equal to null.

In [27]:
def get_long_balls(df: pd.DataFrame) -> pd.DataFrame:
    """Extract the completed passes with length more than 35 yards.
    The completed passes are those with the outcome equal to null, as specified on
    https://statsbomb.com/wp-content/uploads/2022/08/Working-with-R.pdf.

    Parameters
    ----------
    df : pd.DataFrame
        Event data

    Returns
    -------
    pd.DataFrame
        DataFrame with player_id, long_balls
    """
    return (
        df[
            (df["type"] == "Pass")  # Pass id
            & (df["pass_length"] >= 35)
            & (df["pass_outcome"].isna())
        ]
        .groupby(by=["player_id","player"])
        .size()
        .reset_index(name="long_balls")
        .sort_values(by="long_balls", ascending=False, ignore_index=True)
    )

In [28]:
long_balls_players = get_long_balls(df=df)

In [29]:
long_balls_players.head(5)

Unnamed: 0,player_id,player,long_balls
0,3522.0,Heurelho da Silva Gomes,387
1,3815.0,Kasper Schmeichel,359
2,3461.0,Petr Čech,316
3,20005.0,Toby Alderweireld,315
4,3262.0,Łukasz Fabiański,296


## Final Pass

*Final Pass* is defined as the ability to create goal-scoring opportunities for teammates by passing the ball.

I used the following metrics to assess the players' Final Pass capabilities:
- Assists.
- xG Assisted.
- Key passes.
- Key passes under pressure.
- Progressive passes.



### Assist

In [30]:
def get_assist(df: pd.DataFrame) -> pd.DataFrame:
    """Get the number of goals assisted.

    Parameters
    ----------
    df : pd.DataFrame
        Event data

    Returns
    -------
    pd.DataFrame
        DataFrame with player_id, assists
    """
    return (
        df[df["pass_goal_assist"]==True]
        .groupby(by=["player_id", "player"])
        .size()
        .reset_index(name="assists")
        .sort_values(by="assists", ascending=False, ignore_index=False)
    )

In [31]:
assist_players = get_assist(df=df)

In [32]:
assist_players.head()

Unnamed: 0,player_id,player,assists
70,3496.0,Mesut Özil,19
68,3491.0,Dimitri Payet,12
3,3043.0,Christian Dannemann Eriksen,12
65,3473.0,James Philip Milner,11
132,3831.0,Dušan Tadić,11


### xG assisted

In [33]:
def get_xga(df: pd.DataFrame) -> pd.DataFrame:
    """Compute the xG of the shots assisted by each player.
    Use the 'pass_assisted_shot_id' feature to have the corresponding shot for each assist.

    Parameters
    ----------
    df : pd.DataFrame
        Event data

    Returns
    -------
    pd.DataFrame
        DataFrame with player_id, xg_assist
    """
    support_df = pd.merge(
        left=df,
        right=df,
        left_on="pass_assisted_shot_id",
        right_on="id",
        how="inner",
        suffixes=["_assist", "_shot"],
    )

    return (
        support_df.groupby(by=["player_id_assist", "player_assist"])
        .agg({"shot_statsbomb_xg_shot": "sum"})
        .reset_index()
        .rename(
            columns={
                "player_id_assist": "player_id",
                "player_assist": "player",
                "shot_statsbomb_xg_shot": "xg_assist",
            }
        )
        .sort_values(by="xg_assist", ascending=False, ignore_index=True)
    )

In [34]:
xga_players = get_xga(df=df)

In [35]:
xga_players.head(10)

Unnamed: 0,player_id,player,xg_assist
0,3496.0,Mesut Özil,13.824563
1,3491.0,Dimitri Payet,10.874015
2,3814.0,Riyad Mahrez,9.525558
3,3089.0,Kevin De Bruyne,9.401009
4,3043.0,Christian Dannemann Eriksen,8.674473
5,3585.0,Erik Lamela,8.5885
6,3454.0,Troy Deeney,8.246785
7,3478.0,Francesc Fàbregas i Soler,8.067358
8,3385.0,Alexis Alejandro Sánchez Sánchez,7.617406
9,3307.0,Marc Albrighton,7.513881


### Key Passes

In [36]:
def get_key_passes(df: pd.DataFrame) -> pd.DataFrame:
    """Extract the passes that lead to a shot or a goal, called key passes.

    Parameters
    ----------
    df : pd.DataFrame
        Event data dataframe

    Returns
    -------
    pd.DataFrame
        Dataframe with player_id and the number of key passes
    """
    return (
        df[
            (df["pass_goal_assist"]) | (df["pass_shot_assist"])
        ]
        .groupby(by=["player_id", "player"])
        .size()
        .reset_index(name="key_passes")
        .sort_values(by="key_passes", ascending=False, ignore_index=True)
    )

In [37]:
key_passes_players = get_key_passes(df=df)

In [38]:
key_passes_players.head(10)

Unnamed: 0,player_id,player,key_passes
0,3496.0,Mesut Özil,19
1,3491.0,Dimitri Payet,12
2,3043.0,Christian Dannemann Eriksen,12
3,3473.0,James Philip Milner,11
4,3831.0,Dušan Tadić,11
5,3064.0,David Josué Jiménez Silva,10
6,3814.0,Riyad Mahrez,10
7,3089.0,Kevin De Bruyne,9
8,3726.0,Gerard Deulofeu Lázaro,9
9,4275.0,Ross Barkley,8


### Under pressure key passes

In [39]:
def get_under_pressure_key_passes(df: pd.DataFrame) -> pd.DataFrame:
    """Extract the passes that lead to a shot or a goal, called key passes,
    considering only those played under pressure.

    Parameters
    ----------
    df : pd.DataFrame
        Event data dataframe

    Returns
    -------
    pd.DataFrame
        Dataframe with player_id and the number of key passes under pressure
        
    """
    return (
        df[
            ((df["pass_goal_assist"]) | (df["pass_shot_assist"]))
            & (df["under_pressure"] == True)
        ]
        .groupby(by=["player_id", "player"])
        .size()
        .reset_index(name="under_pressure_key_passes")
        .sort_values(by="under_pressure_key_passes", ascending=False, ignore_index=True)
    )

In [40]:
under_pressure_key_passes_players = get_under_pressure_key_passes(df=df)

In [41]:
under_pressure_key_passes_players.head(10)

Unnamed: 0,player_id,player,under_pressure_key_passes
0,3831.0,Dušan Tadić,4
1,3094.0,Bamidele Alli,4
2,11062.0,Graziano Pellè,4
3,10960.0,Jamie Vardy,3
4,4090.0,Adam David Lallana,3
5,3604.0,Olivier Giroud,3
6,3289.0,Romelu Lukaku Menama,3
7,3325.0,Marko Arnautović,3
8,4275.0,Ross Barkley,2
9,3647.0,Alex Iwobi,2


### Progressive passes

In [42]:
def get_progressive_passes(df: pd.DataFrame) -> pd.DataFrame:
    """Completed open-play passes that move at least 25% closer to the goal from its origin.
    Exclude passes from the defending 40% of the pitch.

    Parameters
    ----------
    df : pd.DataFrame
        Statsbomb Event data

    Returns
    -------
    pd.DataFrame
    """
    df_copy = df.copy()
    df_copy = df_copy.dropna(subset='location')
    passes = df_copy[
        (df_copy["type"] == "Pass")
        & (df_copy["pass_outcome"].isna())  # Only completed passes
        & (df_copy["pass_type"].isna())     # Only open-play passes
    ]
    # Excludes passes from the defending 40% of the pitch
    x_not_def = 120 * 0.4
    passes_not_def = passes[passes["location"].apply(lambda x: x[0] > x_not_def)]
    # Compute distance from the opposition goal for both origin and destination of the pass
    passes_not_def["dist_origin"] = passes_not_def["location"].apply(
        lambda loc: np.sqrt(np.square(120 - loc[0]) + np.square(40 - loc[1]))
    )
    passes_not_def["dist_dest"] = passes_not_def["pass_end_location"].apply(
        lambda loc: np.sqrt(np.square(120 - loc[0]) + np.square(40 - loc[1]))
    )
    # Compute the ratio between dist_dest and dist_origin
    passes_not_def["ratio_dist"] = (
        passes_not_def["dist_dest"] / passes_not_def["dist_origin"]
    )
    passes_not_def["is_progressive"] = passes_not_def["ratio_dist"] < 0.75
    # Progressive passes only those passes that approach the opposition goal line of the 25%
    prog_passes = passes_not_def[passes_not_def["is_progressive"]]

    return (
        prog_passes.groupby(by=["player_id","player"])
        .size()
        .reset_index(name="prog_pass")
        .sort_values(by="prog_pass", ascending=False, ignore_index=True)
    )

In [43]:
prog_pass_players = get_progressive_passes(df=df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passes_not_def["dist_origin"] = passes_not_def["location"].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passes_not_def["dist_dest"] = passes_not_def["pass_end_location"].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passes_not_def["ratio_dist"] = (
A value is trying to be set on a 

In [44]:
prog_pass_players.head(10)

Unnamed: 0,player_id,player,prog_pass
0,3478.0,Francesc Fàbregas i Soler,265
1,3496.0,Mesut Özil,259
2,3633.0,Gareth Barry,181
3,3385.0,Alexis Alejandro Sánchez Sánchez,178
4,3831.0,Dušan Tadić,168
5,3043.0,Christian Dannemann Eriksen,165
6,3473.0,James Philip Milner,156
7,3517.0,Aaron Ramsey,154
8,4751.0,Michael Carrick,154
9,3472.0,Willian Borges da Silva,152


## Box Positioning

*Box Positioning* is defined as the ability to move into goal-scoring
positions in valuable pitch locations by clever positioning in and around the opponent
penalty area at the right time.

I considered the number of ball receipts in the penalty box and in the Zone 14, i.e., the zone located in the middle of the pitch immediately outside the penalty area appears crucial for goal scoring (Taylor et al., 2002), that indicates how well players are positioned near the opposition goal to receive the passes. Then, the number of non-penalty expected goals indicates how dangerous players are positioned when they shot the ball.

### Penalty box receipts

In [45]:
def get_penalty_box_receipts(df: pd.DataFrame) -> pd.DataFrame:
    succ_ball_rec = df[
        (df["ball_receipt_outcome"].isna())  # Successful receipt
        & (df["type"] == "Ball Receipt*")
    ]
    
    pen_box_receipts = succ_ball_rec[
        succ_ball_rec["location"].apply(
            lambda loc: loc[0] >= 102 and loc[1] >= 18 and loc[1] >= 62
    )]

    return (
        pen_box_receipts.groupby(by=["player_id", "player"])
        .size()
        .reset_index(name="penalty_receipts")
        .sort_values(by="penalty_receipts", ascending=False, ignore_index=True)
    )

In [46]:
penalty_box_receipts_players = get_penalty_box_receipts(df=df)

In [47]:
penalty_box_receipts_players.head()

Unnamed: 0,player_id,player,penalty_receipts
0,6821.0,Jesús Navas González,151
1,7110.0,Bacary Sagna,112
2,3650.0,Daryl Janmaat,106
3,3091.0,Moussa Sissoko,105
4,3523.0,Craig Dawson,101


### Zone 14 receipts

In [48]:
def get_zone14_receipts(df: pd.DataFrame) -> pd.DataFrame:
    zone_14_x1 = (120 / 6) * 4
    zone_14_x2 = (120 / 6) * 5
    zone_14_y1 = 80 / 3
    zone_14_y2 = (80 / 3) * 2
    # Consider only the events in the Zone 14
    succ_ball_rec = df[
        (df["ball_receipt_outcome"].isna())  # Successful receipt
        & (df["type"] == "Ball Receipt*")
    ]
    zone_14_receipts_df = succ_ball_rec[
        succ_ball_rec["location"].apply(
            lambda loc: loc[0] >= zone_14_x1 and
                        loc[0] <= zone_14_x2 and
                        loc[1] >= zone_14_y1 and
                        loc[1] <= zone_14_y2
        )
    ]

    return (
        zone_14_receipts_df.groupby(by=["player_id", "player"])
        .size()
        .reset_index(name="zone14_receipts")
        .sort_values(by="zone14_receipts", ascending=False, ignore_index=True)
    )

In [49]:
zone14_receipts_players = get_zone14_receipts(df=df)

In [50]:
zone14_receipts_players.head()

Unnamed: 0,player_id,player,zone14_receipts
0,3289.0,Romelu Lukaku Menama,216
1,3454.0,Troy Deeney,190
2,5458.0,Odion Jude Ighalo,181
3,3496.0,Mesut Özil,163
4,4275.0,Ross Barkley,154


### Non-penalty xG

In [51]:
def get_tot_players_statsbomb_openplayxg(df: pd.DataFrame) -> pd.DataFrame:
    """Compute the total number of open-play expected goals of each player.

    Parameters
    ----------
    df : pd.DataFrame
        The Statsbomb event data

    Returns
    -------
    pd.DataFrame
        player_id, tot_npxg
    """
    return (
        df[df['shot_type'] == "Open Play"]
        .groupby(by=["player_id", "player"])
        .agg({"shot_statsbomb_xg": "sum"})
        .reset_index()
        .rename(columns={"shot_statsbomb_xg": "tot_npxg"})
        .sort_values(by="tot_npxg", ascending=False, ignore_index=True)
    )

In [52]:
openplay_xg_players = get_tot_players_statsbomb_openplayxg(df=df)

In [53]:
openplay_xg_players.head(10)

Unnamed: 0,player_id,player,tot_npxg
0,10955.0,Harry Kane,17.708696
1,10960.0,Jamie Vardy,17.55159
2,5458.0,Odion Jude Ighalo,16.919278
3,3289.0,Romelu Lukaku Menama,16.268007
4,3237.0,Sergio Leonel Agüero del Castillo,13.709017
5,3337.0,Jermain Defoe,12.426376
6,3604.0,Olivier Giroud,12.234203
7,5198.0,Diego da Silva Costa,11.809204
8,3629.0,Sadio Mané,11.268578
9,3385.0,Alexis Alejandro Sánchez Sánchez,10.700351


### Final Pass and Box Positioning evaluation

All the metrics to assess Final Pass and Box Positioning capabilities are rescaled between 0 and 1 and averaged to get an unique metric, called *fin_pass_box_pos*.

In [54]:
def join_metrics(metric_df_list: list[pd.DataFrame]) -> pd.DataFrame:
    """Method to join together all the Final Pass and Box Positioning metrics.

    Parameters
    ----------
    metric_df_list : list[pd.DataFrame]
        List of the Final Pass and Box Positioning metrics

    Returns
    -------
    Optional[pd.DataFrame]
        A DataFrame with the player id and all the corresponding Final Pass and Box Positioning metrics
    """
    if len(metric_df_list) < 2:
        print("Add more metrics.")
        return pd.DataFrame()

    df_to_ret = metric_df_list[0]

    for i in range(1, len(metric_df_list)):
        df_to_ret = pd.merge(df_to_ret, metric_df_list[i], on="player_id", how="inner", suffixes=[f"_{i-1}", f"_{i}"])

    return df_to_ret

In [55]:
# List of the Final Pass and Box Positioning metrics
metric_df_list = [
    assist_players,
    xga_players,
    key_passes_players,
    under_pressure_key_passes_players,
    prog_pass_players,
    penalty_box_receipts_players,
    zone14_receipts_players,
    openplay_xg_players,
]

In [56]:
# Join the dataframe with the single metrics
finalpass_boxpos_metrics = join_metrics(metric_df_list=metric_df_list)
# Save the player ids to later concatenate them with the rescaled metrics
player_id = finalpass_boxpos_metrics[["player_id", "player_0"]]

finalpass_boxpos_metrics = finalpass_boxpos_metrics.drop(columns="player_id")
finalpass_boxpos_metrics = finalpass_boxpos_metrics.select_dtypes(exclude="object")
# Metrics rescaling
scaled_finalpass_boxpos_metrics = (
    finalpass_boxpos_metrics - finalpass_boxpos_metrics.min()
) / (finalpass_boxpos_metrics.max() - finalpass_boxpos_metrics.min())
# Get a unique metric that is the average of the other ones
fin_pass_box_pos = (
    scaled_finalpass_boxpos_metrics.mean(axis=1)
    .reset_index(name="fin_pass_box_pos")
    .drop(columns="index")
)
fin_pass_box_pos = pd.concat((fin_pass_box_pos, player_id), axis=1)
fin_pass_box_pos = fin_pass_box_pos.rename(columns={"player_0": "player"})
fin_pass_box_pos = fin_pass_box_pos.sort_values(
    by="fin_pass_box_pos", ascending=False, ignore_index=True
)

### The top 5 Premier League 2015/2016 players according to Final Pass and Box Positioning capabilities

In [57]:
fin_pass_box_pos.head(10)

Unnamed: 0,fin_pass_box_pos,player_id,player
0,0.717673,3831.0,Dušan Tadić
1,0.595171,3473.0,James Philip Milner
2,0.588152,3814.0,Riyad Mahrez
3,0.585368,3094.0,Bamidele Alli
4,0.566833,3289.0,Romelu Lukaku Menama
5,0.5526,3454.0,Troy Deeney
6,0.541477,4275.0,Ross Barkley
7,0.48575,11062.0,Graziano Pellè
8,0.474861,3585.0,Erik Lamela
9,0.474573,10960.0,Jamie Vardy
