In [25]:
import plotly.express as px
import matplotlib as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast


def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])


biwenger = pd.read_csv("../data/players.csv", index_col=[0])
lineups = pd.read_csv("../data/lineups_base.csv", sep=";", index_col=[0])

positions = ["Goalkeeper", "Defender", "Midfielder", "Forward"]

In [26]:
th_props = [
  ('font-size', '11px'),
  ('text-align', 'center'),
  ('font-weight', 'bold'),
  ('color', '#6d6d6d'),
  ('background-color', '#f7f7f9')
]

td_props = [
  ('font-size', '11px')
]

styles = [
  dict(selector="th", props=th_props),
  dict(selector="td", props=td_props)
]

green_cm = sns.light_palette("#44ab42", as_cmap=True)

def color_text_distribution(value, mean, std, positive=True):
    if value <= mean-(1.5*std):
        color = 'red' if positive else 'green'
    elif value >= mean+(1.5*std):
        color = 'green' if positive else 'red'
    else:
        color = 'black'
    return 'color: %s' % color

def top_players_table(players, position, k=15):
    selected_columns = ["name", "games", "points", "value", "cards", "points_per_million", "points_per_game", "pct_lineup"]
    top = players.loc[players.position == position, selected_columns].sort_values("pct_lineup", ascending=False).head(k)

    card_stats = players.loc[players.position == position, "cards"].describe()
    ppg_stats = players.loc[players.position == position, "points_per_game"].describe()
    ppm_stats = players.loc[players.position == position, "points_per_million"].describe()

    top["value"] = top.value.apply(human_format)
    
    return (top.style
               .applymap(lambda x: "font-weight: bold", subset=['name'])
               .background_gradient(cmap=green_cm, subset=['pct_lineup'])
               .applymap(lambda x: color_text_distribution(x, card_stats["mean"], card_stats["std"], positive=False), subset=['cards'])
               .applymap(lambda x: color_text_distribution(x, ppg_stats["mean"], ppg_stats["std"]), subset=['points_per_game'])
               .applymap(lambda x: color_text_distribution(x, ppm_stats["mean"], ppm_stats["std"]), subset=['points_per_million'])
               .set_caption('Top {0}s:'.format(position))
               .format({'points_per_game': "{:.3}", 'points_per_million': "{:.4}"})
               .set_table_styles(styles))

def bubble_chart(players, position):
    top = players.loc[players.position == position]
    fig = px.scatter(top, x="points_per_game", y="points_per_million", size="pct_lineup", color="value", hover_name="name")
    fig.update_xaxes(title_text='Points per Game')
    fig.update_yaxes(title_text='Points per Million')
    fig.show()

# Position Analysis

In [38]:
position_stats = []
for position in positions:
    # Points average and sum
    str_points = lineups["{0}_points".format(position.lower())]
    point_sum = str_points.apply(ast.literal_eval).apply(np.sum).mean()
    point_avg = str_points.apply(ast.literal_eval).apply(np.mean).mean()
    
    # Value average and sum
    str_value = lineups["{0}_values".format(position.lower())]
    value_sum = str_value.apply(ast.literal_eval).apply(np.sum).mean()
    value_avg = str_value.apply(ast.literal_eval).apply(np.mean).mean()
    value_pct = value_sum/lineups["total_value"]
    
    position_stats.append({
        "position": position,
        "sum_points": human_format(point_sum),
        "avg_points": human_format(point_avg),
        "sum_value": human_format(value_sum),
        "avg_value": human_format(value_avg),
        "pct_value": "{0}%".format(int(value_pct.mean()*100))
    })
    
position_stats = pd.DataFrame(position_stats)
position_stats

Unnamed: 0,avg_points,avg_value,pct_value,position,sum_points,sum_value
0,154,3.38M,6%,Goalkeeper,154,3.38M
1,133,2.32M,16%,Defender,533,9.29M
2,148,3.18M,34%,Midfielder,887,19.1M
3,225,8.18M,43%,Forward,675,24.5M


# Player Analysis

In [28]:
player_appearances = []
for _, row in lineups.iterrows():
    for position in positions:
        str_players = row["{0}s".format(position.lower())]
        for player in ast.literal_eval(str_players):
            player_appearances.append({"name": player})

player_counts = pd.DataFrame(player_appearances).groupby("name").size().reset_index(name='lineups')
players = pd.merge(player_counts, biwenger, how='left', on='name')

players["points_per_million"] = players["points"] / players["value"] * 1000000
players["pct_lineup"] = players["lineups"] / lineups.shape[0] * 100
players["points_per_game"] = players["points"] / players["games"]

## Goalkeepers

In [29]:
top_players_table(players, "Goalkeeper")

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
62,David Soria,37,178,3.69M,0,48.24,4.81,12.2
171,Oblak,37,196,6.86M,0,28.57,5.3,11.3
175,Pacheco,35,173,3.91M,3,44.25,4.94,10.6
13,Asenjo,32,157,3.15M,0,49.84,4.91,10.4
151,Masip,35,150,3.12M,1,48.08,4.29,8.8
220,Ter Stegen,35,167,5.39M,0,30.98,4.77,8.7
69,Diego López,38,160,3.15M,1,50.79,4.21,8.1
55,Cuéllar,34,148,2.54M,5,58.27,4.35,7.4
202,Rulli,27,117,2.21M,3,52.94,4.33,4.7
226,Vaclík,33,128,3.23M,1,39.63,3.88,4.4


In [30]:
bubble_chart(players, "Goalkeeper")

## Defenders

In [31]:
top_players_table(players, "Defender", k=20)

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
51,Cote,35,179,3.23M,9,55.42,5.11,12.2
182,Piqué,35,227,8.5M,6,26.71,6.49,11.2
200,Rubén Peña,31,174,2.7M,4,64.44,5.61,10.3
121,Jordi Alba,36,200,7.26M,6,27.55,5.56,9.6
168,Nacho Martínez,35,166,2.41M,10,68.88,4.74,9.4
34,Calero,36,166,3.1M,7,53.55,4.61,9.3
133,Laguardia,36,170,3.39M,12,50.15,4.72,9.2
143,Mandi,35,158,2.56M,10,61.72,4.51,8.8
117,Jesús Navas,32,193,6.46M,8,29.88,6.03,8.8
103,Hugo Mallo,35,154,3.2M,11,48.12,4.4,8.6


In [32]:
bubble_chart(players, "Defender")

## Midfielders

In [33]:
top_players_table(players, "Midfielder", k=20)

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
36,Canales,32,214,7.21M,7,29.68,6.69,18.4
120,Jordan,36,201,4.85M,10,41.44,5.58,17.0
42,Cazorla,35,215,6.94M,2,30.98,6.14,16.4
35,Campaña,36,194,4.24M,12,45.75,5.39,16.0
176,Parejo,36,255,11.2M,6,22.81,7.08,15.4
186,Rakitic,34,195,5.58M,7,34.95,5.74,13.4
230,Vidal,33,174,2.53M,7,68.77,5.27,12.6
118,Joaquín,30,175,2.79M,5,62.72,5.83,12.6
2,Alcaraz,34,176,3.32M,13,53.01,5.18,12.5
146,Marc Roca,35,173,3.76M,9,46.01,4.94,12.5


In [34]:
bubble_chart(players, "Midfielder")

## Forwards

In [35]:
top_players_table(players, "Forward", k=20)

Unnamed: 0,name,games,points,value,cards,points_per_million,points_per_game,pct_lineup
157,Messi,34,428,22.8M,3,18.76,12.6,42.6
122,Jorge Molina,38,244,6.49M,2,37.6,6.42,19.0
20,Ben Yedder,35,246,7.63M,4,32.24,7.03,17.4
21,Benzema,36,263,10.9M,1,24.08,7.31,15.1
44,Charles,34,185,2.62M,4,70.61,5.44,13.5
112,Jaime Mata,34,209,6.77M,10,30.87,6.15,12.9
140,Luis Suárez,33,262,11.8M,5,22.28,7.94,12.4
95,Griezmann,37,244,11.7M,5,20.84,6.59,9.9
24,Borja Iglesias,37,239,9.85M,5,24.26,6.46,9.9
65,Dembélé,29,179,4.2M,1,42.62,6.17,8.8


In [36]:
bubble_chart(players, "Forward")