In [None]:
import sys
!{sys.executable} -m pip install nbformat

In [3]:
import plotly.express as px
import matplotlib as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast


biwenger = pd.read_csv("../data/biwenger.csv", index_col=[0])
biwenger["value"] = biwenger.value.str.replace(',', '').astype(int)
lineups = pd.read_csv("../data/lineups.csv", sep=";", index_col=[0])
positions = ["Goalkeeper", "Defender", "Midfielder", "Forward"]

In [12]:
th_props = [
  ('font-size', '11px'),
  ('text-align', 'center'),
  ('font-weight', 'bold'),
  ('color', '#6d6d6d'),
  ('background-color', '#f7f7f9')
]

td_props = [
  ('font-size', '11px')
]

styles = [
  dict(selector="th", props=th_props),
  dict(selector="td", props=td_props)
]

green_cm = sns.light_palette("#44ab42", as_cmap=True)

def color_text_distribution(value, mean, std, positive=True):
    if value <= mean-(1.5*std):
        color = 'red' if positive else 'green'
    elif value >= mean+(1.5*std):
        color = 'green' if positive else 'red'
    else:
        color = 'black'
    return 'color: %s' % color

def top_players_table(players, position, k=15):
    selected_columns = ["name", "games", "points", "value", "cards", "points_per_million", "points_per_game", "pct_lineup"]
    top = players.loc[players.position == position, selected_columns].sort_values("pct_lineup", ascending=False).head(k)

    card_stats = players.loc[players.position == position, "cards"].describe()
    ppg_stats = players.loc[players.position == position, "points_per_game"].describe()
    ppm_stats = players.loc[players.position == position, "points_per_million"].describe()

    return (top.style
               .applymap(lambda x: "font-weight: bold", subset=['name'])
               .background_gradient(cmap=green_cm, subset=['pct_lineup'])
               .applymap(lambda x: color_text_distribution(x, card_stats["mean"], card_stats["std"], positive=False), subset=['cards'])
               .applymap(lambda x: color_text_distribution(x, ppg_stats["mean"], ppg_stats["std"]), subset=['points_per_game'])
               .applymap(lambda x: color_text_distribution(x, ppm_stats["mean"], ppm_stats["std"]), subset=['points_per_million'])
               .set_caption('Top {0}s:'.format(position))
               .format({'points_per_game': "{:.3}", 'points_per_million': "{:.4}"})
               .set_table_styles(styles))

def bubble_chart(players, position):
    top = players.loc[players.position == position]
    fig = px.scatter(top, x="points_per_game", y="points_per_million", size="pct_lineup", color="value", hover_name="name")
    fig.update_xaxes(title_text='Points per Game')
    fig.update_yaxes(title_text='Points per Million')
    fig.show()

# Position Analysis

In [13]:
position_stats = []
for position in positions:
    # Points average and sum
    str_points = lineups["{0}_points".format(position.lower())]
    point_sum = str_points.apply(ast.literal_eval).apply(np.sum).mean()
    point_avg = str_points.apply(ast.literal_eval).apply(np.mean).mean()
    
    # Value average and sum
    str_value = lineups["{0}_values".format(position.lower())]
    value_sum = str_value.apply(ast.literal_eval).apply(np.sum).mean()
    value_avg = str_value.apply(ast.literal_eval).apply(np.mean).mean()
    value_pct = value_sum/lineups["total_value"]
    
    position_stats.append({
        "position": position,
        "sum_points": point_sum,
        "avg_points": point_avg,
        "sum_value": value_sum,
        "avg_value": value_avg,
        "pct_value": value_pct.mean()
    })
    
position_stats = pd.DataFrame(position_stats)
position_stats

Unnamed: 0,avg_points,avg_value,pct_value,position,sum_points,sum_value
0,154.409,3376700.0,0.060189,Goalkeeper,154.409,3376700.0
1,133.3055,2322210.0,0.165572,Defender,533.222,9288840.0
2,147.763667,3179542.0,0.340049,Midfielder,886.582,19077250.0
3,224.929,8180720.0,0.437461,Forward,674.787,24542160.0


# Player Analysis

In [14]:
player_appearances = []
for _, row in lineups.iterrows():
    for position in positions:
        str_players = row["{0}s".format(position.lower())]
        for player in ast.literal_eval(str_players):
            player_appearances.append({"name": player})

player_counts = pd.DataFrame(player_appearances).groupby("name").size().reset_index(name='lineups')
players = pd.merge(player_counts, biwenger, how='left', on='name')

players["points_per_million"] = players["points"] / players["value"] * 1000000
players["pct_lineup"] = players["lineups"] / lineups.shape[0] * 100
players["points_per_game"] = players["points"] / players["games"]

## Goalkeepers

In [15]:
top_players_table(players, "Goalkeeper")

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
62,David Soria,37,178,3580000.0,0,49.72,4.81,12.2
171,Oblak,37,196,6640000.0,0,29.52,5.3,11.3
175,Pacheco,35,173,3650000.0,3,47.4,4.94,10.6
13,Asenjo,32,157,2790000.0,0,56.27,4.91,10.4
151,Masip,35,150,2900000.0,1,51.72,4.29,8.8
220,Ter Stegen,35,167,4870000.0,0,34.29,4.77,8.7
69,Diego López,0,0,2990000.0,0,0.0,,8.1
55,Cuéllar,34,148,2260000.0,5,65.49,4.35,7.4
202,Rulli,27,117,2050000.0,3,57.07,4.33,4.7
226,Vaclík,33,128,3090000.0,1,41.42,3.88,4.4


In [None]:
bubble_chart(players, "Goalkeeper")

## Defenders

In [17]:
top_players_table(players, "Defender")

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
51,Cote,35,179,3220000.0,9,55.59,5.11,12.2
182,Piqué,35,227,8410000.0,6,26.99,6.49,11.2
200,Rubén Peña,31,174,2560000.0,4,67.97,5.61,10.3
121,Jordi Alba,36,200,6940000.0,6,28.82,5.56,9.6
168,Nacho Martínez,35,166,2320000.0,10,71.55,4.74,9.4
34,Calero,36,166,3060000.0,7,54.25,4.61,9.3
133,Laguardia,36,170,3360000.0,12,50.6,4.72,9.2
117,Jesús Navas,32,193,6410000.0,8,30.11,6.03,8.8
143,Mandi,35,158,2460000.0,10,64.23,4.51,8.8
103,Hugo Mallo,35,154,3140000.0,11,49.04,4.4,8.6


In [None]:
bubble_chart(players, "Defender")

## Midfielders

In [18]:
top_players_table(players, "Midfielder")

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
36,Canales,32,214,6730000.0,7,31.8,6.69,18.4
120,Jordan,36,201,4660000.0,10,43.13,5.58,17.0
42,Cazorla,35,215,6890000.0,2,31.2,6.14,16.4
35,Campaña,36,194,4200000.0,12,46.19,5.39,16.0
176,Parejo,36,255,11420000.0,6,22.33,7.08,15.4
186,Rakitic,34,195,5550000.0,7,35.14,5.74,13.4
162,Moi Gómez,36,149,440000.0,3,338.6,4.14,12.7
230,Vidal,33,174,3070000.0,7,56.68,5.27,12.6
118,Joaquín,30,175,2630000.0,5,66.54,5.83,12.6
2,Alcaraz,34,176,3260000.0,13,53.99,5.18,12.5


In [None]:
bubble_chart(players, "Midfielder")

## Forwards

In [19]:
top_players_table(players, "Forward")

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
157,Messi,34,428,22840000.0,3,18.74,12.6,42.6
122,Jorge Molina,38,244,6410000.0,2,38.07,6.42,19.0
20,Ben Yedder,35,246,8340000.0,4,29.5,7.03,17.4
21,Benzema,36,263,10750000.0,1,24.47,7.31,15.1
44,Charles,34,185,2540000.0,4,72.83,5.44,13.5
112,Jaime Mata,34,209,6500000.0,10,32.15,6.15,12.9
140,Luis Suárez,33,262,11570000.0,5,22.64,7.94,12.4
95,Griezmann,37,244,11130000.0,5,21.92,6.59,9.9
24,Borja Iglesias,37,239,9760000.0,5,24.49,6.46,9.9
65,Dembélé,29,179,4060000.0,1,44.09,6.17,8.8


In [None]:
bubble_chart(players, "Forward")