In [None]:
import plotly.express as px
import matplotlib as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast


def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])


biwenger = pd.read_csv("../data/players.csv", index_col=[0])
lineups = pd.read_csv("../data/lineups_base.csv", sep=";", index_col=[0])

positions = ["Goalkeeper", "Defender", "Midfielder", "Forward"]

In [None]:
th_props = [
  ('font-size', '11px'),
  ('text-align', 'center'),
  ('font-weight', 'bold'),
  ('color', '#6d6d6d'),
  ('background-color', '#f7f7f9')
]

td_props = [
  ('font-size', '11px')
]

styles = [
  dict(selector="th", props=th_props),
  dict(selector="td", props=td_props)
]

green_cm = sns.light_palette("#44ab42", as_cmap=True)

def color_text_distribution(value, mean, std, positive=True):
    if value <= mean-(1.5*std):
        color = 'red' if positive else 'green'
    elif value >= mean+(1.5*std):
        color = 'green' if positive else 'red'
    else:
        color = 'black'
    return 'color: %s' % color

def top_players_table(players, position, k=15):
    selected_columns = ["name", "games", "points", "value", "cards", "points_per_million", "points_per_game", "pct_lineup"]
    top = players.loc[players.position == position, selected_columns].sort_values("pct_lineup", ascending=False).head(k)

    card_stats = players.loc[players.position == position, "cards"].describe()
    ppg_stats = playe1rs.loc[players.position == position, "points_per_game"].describe()
    ppm_stats = players.loc[players.position == position, "points_per_million"].describe()

    top["value"] = top.value.apply(human_format)
    
    return (top.style
               .applymap(lambda x: "font-weight: bold", subset=['name'])
               .background_gradient(cmap=green_cm, subset=['pct_lineup'])
               .applymap(lambda x: color_text_distribution(x, card_stats["mean"], card_stats["std"], positive=False), subset=['cards'])
               .applymap(lambda x: color_text_distribution(x, ppg_stats["mean"], ppg_stats["std"]), subset=['points_per_game'])
               .applymap(lambda x: color_text_distribution(x, ppm_stats["mean"], ppm_stats["std"]), subset=['points_per_million'])
               .set_caption('Top {0}s:'.format(position))
               .format({'points_per_game': "{:.3}", 'points_per_million': "{:.4}"})
               .set_table_styles(styles))

def bubble_chart(players, position):
    top = players.loc[players.position == position]
    fig = px.scatter(top, x="points_per_game", y="points_per_million", size="pct_lineup", color="value", hover_name="name")
    fig.update_xaxes(title_text='Points per Game')
    fig.update_yaxes(title_text='Points per Million')
    fig.show()

# Position Analysis

In [None]:
position_stats = []
for position in positions:
    # Points average and sum
    str_points = lineups["{0}_points".format(position.lower())]
    point_sum = str_points.apply(ast.literal_eval).apply(np.sum).mean()
    point_avg = str_points.apply(ast.literal_eval).apply(np.mean).mean()
    
    # Value average and sum
    str_value = lineups["{0}_values".format(position.lower())]
    value_sum = str_value.apply(ast.literal_eval).apply(np.sum).mean()
    value_avg = str_value.apply(ast.literal_eval).apply(np.mean).mean()
    value_pct = value_sum/lineups["total_value"]
    
    position_stats.append({
        "position": position,
        "sum_points": human_format(point_sum),
        "avg_points": human_format(point_avg),
        "sum_value": human_format(value_sum),
        "avg_value": human_format(value_avg),
        "pct_value": "{0}%".format(int(value_pct.mean()*100))
    })
    
position_stats = pd.DataFrame(position_stats)
position_stats

# Player Analysis

In [None]:
player_appearances = []
for _, row in lineups.iterrows():
    for position in positions:
        str_players = row["{0}s".format(position.lower())]
        for player in ast.literal_eval(str_players):
            player_appearances.append({"name": player})

player_counts = pd.DataFrame(player_appearances).groupby("name").size().reset_index(name='lineups')
players = pd.merge(player_counts, biwenger, how='left', on='name')

players["points_per_million"] = players["points"] / players["value"] * 1000000
players["pct_lineup"] = players["lineups"] / lineups.shape[0] * 100
players["points_per_game"] = players["points"] / players["games"]

## Goalkeepers

In [None]:
top_players_table(players, "Goalkeeper")

In [None]:
bubble_chart(players, "Goalkeeper")

## Defenders

In [None]:
top_players_table(players, "Defender", k=20)

In [None]:
bubble_chart(players, "Defender")

## Midfielders

In [None]:
top_players_table(players, "Midfielder", k=20)

In [None]:
bubble_chart(players, "Midfielder")

## Forwards

In [None]:
top_players_table(players, "Forward", k=20)

In [None]:
bubble_chart(players, "Forward")