# Top Scorers vs Top Play-makers
In this snippet, I used the data of bundesliga using wikipedia library to compare the impact of Forwards against Midfielders. The final barchart shows their correspondinf scores while the color of each bar indicates the ratio of their scores over the total score of that team.

In [9]:
import numpy as np
import pandas as pd
import wikipedia as wp
import warnings
import re

warnings.filterwarnings('ignore')

In [2]:
def table_seeker(df,keyword):
    for table_index in range(len(df)):
        for j in range(df[table_index].shape[1]):
            try:
                if (keyword in df[table_index].columns[j]):
                    return table_index
            except TypeError:
                pass

In [15]:
def table_scraper(page):
    html = wp.page(page).html()
    df = pd.read_html(html)
    
    total_goals = df[table_seeker(df,"GF")]
    
    top_scorers = df[table_seeker(df,"Goal")]
    try:
        top_assist = df[table_seeker(df,"Assists")]
    except TypeError:
        top_assist = pd.DataFrame()
        
    total_goals.columns = [header.split("[")[0] for header in total_goals.columns ]
    total_goals["year"] = pd.to_datetime(page[:4]).year
#     print(f"Total Goal table is: {total_goals.columns}")
    top_scorers.columns = [header.split("[")[0] for header in top_scorers.columns ]
    top_scorers["year"] = pd.to_datetime(page[:4]).year
    top_assist.columns = [header.split("[")[0] for header in top_assist.columns ]
    top_assist["year"] = pd.to_datetime(page[:4]).year
    return top_scorers, top_assist, total_goals

In [16]:
pages = [f"20{y}–{y+1}" for y in range(14,20)]

ttl_df = []
missed_ttl = []
ts_df = []
missed_ts = []
ta_df = []
missed_ta = []

for page in pages:
    try:
        top_scorers, top_assist, total_goals = table_scraper(page+" Bundesliga")

        ttl_df.append(total_goals)
        ts_df.append(top_scorers)
        ta_df.append(top_assist)
        if len(total_goals)==0:
            missed_ttl.append(page)
        if len(top_assist)==0:
            missed_ta.append(page)
        if len(top_scorers)==0:
            missed_ts.append(page)
    except TypeError:
        print("What is happened!")
        pass

In [17]:
def grouper(df,col="Goals"):
    return df.groupby(["year", "Club"]).agg({col:np.nansum, "year":np.nanmin})

In [18]:
def merging_dfs(df1, df2):
    return pd.merge(df1,df2, how="left", left_index=True, right_index=True).dropna()

In [19]:
DF = merging_dfs(grouper(pd.concat(ts_df)),
           grouper(pd.concat(ta_df), col="Assists")).drop("year_y", axis=1)
DF.columns=["Goals", "Year", "Assists"]
DF.drop("Year", axis=1, inplace=True)
DF.reset_index(inplace=True)

In [20]:
total_df = pd.concat(ttl_df)
total_df.rename(columns = {list(total_df)[1]:'Club'}, inplace=True)
total_df["Club"] = [club.split(" (")[0] for club in total_df.Club]
total_df.set_index(["year", total_df.columns[1]], inplace=True)

In [21]:
DF.Assists[0]=9
DF.Assists[1]=33
DF.Assists[2]=11
DF.Assists[3]=9
DF.Assists[4]=9

In [22]:
DF["Assists"] = DF['Assists'].apply(lambda x:str(x)).str.extract('(\d*)')
DF["Assists"] = pd.to_numeric(DF["Assists"], errors='coerce')

In [23]:
team_codes = {'BIE':'Arminia Bielefeld', 'B04':'Bayer Leverkusen', 'BAY':'Bayern Munich',
'DOR':'Borussia Dortmund', 'BMG':'Borussia Mönchengladbach',
'SGE':'Eintracht Frankfurt',
'RBL':'RB Leipzig',
'S04':'Schalke 04',
'UNB':'Union Berlin',
'STU':'VfB Stuttgart',
'WOB':'VfL Wolfsburg',
'SVW':'Werder Bremen'};
team_codes = {y:x for x,y in team_codes.items()}

In [24]:
DF["code"]=[team_codes[key] for key in DF["Club"]]
DF["Name"] = DF["code"] + " " + DF['year'].astype(str)
DF["Total"] = [total_df.loc[y,t]["GF"] for y,t in zip(DF["year"],DF["Club"])]
DF.drop(['code', 'Club'], axis=1, inplace=True)

In [25]:
DF['Goals_ratio']=DF['Goals']/DF["Total"]
DF['Assists_ratio']=DF['Assists']/DF["Total"]

### Plotting

In [26]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.axes_grid1 import make_axes_locatable

%matplotlib notebook

In [27]:
plt.style.use('seaborn-deep')

In [28]:
cmap1 = mpl.cm.get_cmap('autumn')
cmap2 = mpl.cm.get_cmap('winter')
color_pick1 = mpl.cm.ScalarMappable(
    cmap=cmap1,norm=mpl.colors.Normalize(vmin=DF["Assists_ratio"].min(), vmax=DF["Assists_ratio"].max()))


color_pick2 = mpl.cm.ScalarMappable(
    cmap=cmap2,norm=mpl.colors.Normalize(vmin=DF["Assists_ratio"].min(),
    vmax=DF["Assists_ratio"].max()))

In [29]:
ax = plt.figure(figsize=(10,6)) # figsize=(9,3)
width = 0.38
bars = plt.bar(DF.index, DF["Goals"], width = width)
# plt.bar(DF.index-0.25, DF["Total"], width = 0.25)
plt.bar(DF.index+ width, DF["Assists"], width = width)


<IPython.core.display.Javascript object>

<BarContainer object of 20 artists>

In [30]:
for i in DF.index:
    plt.gca().get_children()[i].set_color(cmap1(DF["Goals_ratio"][i]))
    j = i + len(DF.index)
    plt.gca().get_children()[j].set_color(cmap2(DF["Assists_ratio"][i]))

In [31]:
for i,bar in enumerate(bars):
    plt.gca().text(bar.get_x()+0.17,bar.get_height()-2,bar.get_height() , 
                 ha='center', rotation=0, color='w', fontsize=4.5, weight='bold')
    plt.gca().text(bar.get_x()+width+ 0.17,DF["Assists"][i]-2,DF["Assists"][i] , 
                 ha='center', rotation=0, color='w', fontsize=4.5, weight='bold')

In [32]:
# Remove x-axis and y-axis ticks    
plt.gca().set_xticks([])
plt.gca().set_yticks([])

[]

In [33]:

# Remove the ticks patamiters
plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='off')

for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# Set Y-axis top Limit
plt.ylim(top=50, bottom=0)

# Plot the grapn title
title_fdict={'fontsize': 9,
 'fontweight': 'semibold' ,
 'color': 'black'}

plt.gca().set_title('Who is the BOSS!\nA comparison of goal scorers and play-makers', fontdict=title_fdict)


Text(0.5, 1.0, 'Who is the BOSS!\nA comparison of goal scorers and play-makers')

In [34]:
x = DF["Name"]
for i,bar in enumerate(bars):
# for p in plt.gca():
    plt.gca().text(bar.get_x()+0.20 , .5, x[i], 
    ha='center', va='bottom', rotation=90, color='w', fontsize=4.5, weight='bold')

In [35]:
divider = make_axes_locatable(plt.gca())
cax1 = divider.append_axes('left', size='2%', pad=0.08)
cax2 = divider.append_axes('right', size='2%', pad=0.08)

color_bar1 = plt.colorbar(color_pick1, orientation="vertical", cax=cax1)
color_bar2 = plt.colorbar(color_pick2, orientation="vertical", cax=cax2)

In [36]:
font_dict ={'fontsize': 'smaller',
            'fontweight':'bold',
        'color': cmap1(DF["Goals_ratio"].max()),
        'verticalalignment': 'top',
           'position':(-1,10),
            'rotation':90}
font_dict2 ={'fontsize': 'smaller',
            'fontweight':'bold',
        'color': cmap2(DF["Assists_ratio"].max()),
             'position':(1.8,10),
            'rotation':-90} #  'position':(1.8,0.9),

color_bar1.ax.set_title("Goal_Ratio",font_dict, y=0.95) # vertically oriented colorbar
color_bar2.ax.set_title("Assists_Ratio",font_dict2, loc='left',
                        y=0.8, pad=0.2 )

Text(1.8, 0.8, 'Assists_Ratio')

In [37]:
# left-sided color bar configuration
cbar_ftick ={'fontsize': 'smaller',
        'fontweight': 'ultralight',
        'verticalalignment': 'center',
        'color': 'y',
             'horizontalalignment': 'left'}
color_bar1.ax.set_axis_on()
color_bar1.ax.set_yticklabels([0,"","",0.5,"","",1],cbar_ftick)
color_bar1.ax.tick_params(direction='out',pad=2 , length=1,
                         labeltop=False,labelbottom=False,labelleft=False,labelright=True,labelrotation=0 , width=1, colors='r')

In [38]:
# right-side color bar configuration
cbar_ftick2 ={'fontsize': 'smaller',
        'fontweight': 'ultralight',
        'verticalalignment': 'center',
        'color': cmap2(DF["Assists_ratio"].max()),
             'horizontalalignment': 'right'}
color_bar2.ax.set_axis_on()
color_bar2.ax.set_yticklabels([0,"","",0.5,"","",1],cbar_ftick2)
color_bar2.ax.tick_params(direction='out', left=True, right=False,pad=2 , length=1,
                         labelleft=True,labelright=False,labelrotation=0 , width=1)

In [39]:
# putting a caption

caption_fdict ={'fontsize': 'x-small',
        'fontweight': 'light',
        'verticalalignment': 'bottom',
        'color': 'black'}

txt = """Note: The bar chart shows achieved goals and assists for the players in Top_scorer tables for each season
while the color of each bar implies the ratio of these score relative to total goals for each team."""
plt.gcf().text(.17,0.05, txt, ha='left', fontdict = caption_fdict) # , fontsize=12

Text(0.17, 0.05, 'Note: The bar chart shows achieved goals and assists for the players in Top_scorer tables for each season\nwhile the color of each bar implies the ratio of these score relative to total goals for each team.')