In [43]:
import pandas as pd
import numpy as np
from unidecode import unidecode

POS_DICT = {
    'Goalkeeper': ['GK'],
    'Full Back': ['LB', 'LB5', 'RB', 'RB5'],
    'Wing Back': ['LWB', 'RWB'],
    'Center Back': ['CB', 'CB3', 'RCB', 'LCB', 'RCB3', 'LCB3'],
    'Defensive Midfielder': ['DMF', 'RDMF', 'LDMF'],
    'Central Midfielder': ['LCMF', 'LCMF3', 'RCMF', 'RCMF3'],
    'Attacking Midfielder': ['AMF', 'LAMF', 'RAMF'],
    'Winger': ['LW', 'RW'],
    'Wing Forward': ['LWF', 'RWF'],
    'Striker': ['CF']
}



In [44]:
file_path = 'C:/Users/githk/Projects/footballindex/Sample Data/raw stats/GRE1.xlsx'

df = pd.read_excel(file_path)

In [45]:
df.columns = [col.lower() for col in df.columns]

In [46]:
df.rename(columns={'penalty conversion, %': 'penalties conversion, %'}, inplace=True)
df.rename(columns={'goal conversion, %': 'goals conversion, %'}, inplace=True)
df.rename(columns={'penalties taken': 'penalties'}, inplace=True)

In [47]:
# Conver minutes played to how many 90s each player has played.

df['90s'] = df['minutes played'].map(lambda val: val / 90 if val else -1)

In [48]:
# Convert absolute values columns to per 90 columns.

non_per_90s_cols = ['padj sliding tackles', 'padj interceptions', 'penalties', 'clean sheets']
new_per_90_cols = []
for col in non_per_90s_cols:
    per_90_col = col + ' per 90'
    new_per_90_cols.append(per_90_col)
    df[per_90_col] = df[col] / df['90s']

In [49]:
# Successful percentages columns to absolute successful columns.

success_keywords = ['won', 'accurate', 'successful', 'on target', 'conversion']
percentage_cols = [col for col in df.columns if '%' in col]
for col in percentage_cols:
    for keyword in success_keywords:
        if keyword in col:
            col_name = col.replace(', %', '') + ' per 90'
            df[col_name] = df[col.replace(keyword, '').replace(', %', '').strip() + ' per 90'] * df[col] / 100


In [50]:
# Convert per 90 columns to absolute columns.

per_90_cols = [col for col in df.columns if '90' in col]
col_names = list(map(lambda x: x.replace(' per 90', '') + ' extrp', per_90_cols))
for i, col in enumerate(per_90_cols):
    df[col_names[i]] = df[col] * df['minutes played'] / 90

In [51]:
def element_wise_mapping(position):
    '''
    Element wise mapping.
    position [str]: The abbreviated position.

    returns [str, int]: The full description of the position or -1 if the position is not included in the dictionary's values.
    '''
    for key in POS_DICT:
        for values in POS_DICT[key]:
            if position in values:
                return key
    return -1

In [52]:
# Maps the abbreviation of the positions to the complete description.abs
positions_df = df['position'].str.split(', ', expand=True)
mapped_positions_df = positions_df.applymap(element_wise_mapping, na_action='ignore')
unique_pos_list_df = mapped_positions_df.stack().groupby(level=0).apply(lambda x: x.unique()).rename('position').to_frame()
descriptive_positions = pd.DataFrame(unique_pos_list_df['position'].tolist(), index=unique_pos_list_df.index).add_prefix('position')

In [53]:
df = pd.concat([df, descriptive_positions], axis=1)

In [54]:
# Create the ratio of assist to xA per 90 mins.

df['assists to xa per 90'] = df['assists per 90'] / df['xa per 90']

In [55]:
# Create the ratio of assist to xA.

df['assists to xa'] = df['assists'] / df['xa']

In [56]:
df['minutes per xa'] = df.apply(lambda row: row['minutes played'] / row['xa'] if row['xa'] else -1, axis=1)

In [57]:
df['minutes per assist'] = df.apply(lambda row: row['minutes played'] / row['assists'] if row['assists'] else -1, axis=1)

In [58]:
# Get goals to xG per 90 mins, if this ratio is greater than one it means that the player is a clinical finsiher.
df['goals to xg per 90'] = df.apply(lambda row: row['goals per 90'] / row['xg per 90'] if row['xg per 90'] > 0 else -1, axis=1)

In [59]:
df['goals to xg'] = df.apply(lambda row: row['goals'] / row['xg'] if row['xg'] else -1, axis=1)

In [60]:
df['searchable_player_name'] = df.player.apply(lambda x: unidecode(x))

In [61]:
df.to_csv('./greek-superleague-groups.csv')