In [3]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [6]:
players = pd.read_csv('../data/players.csv')

In [7]:
players.head()

Unnamed: 0,player_id,player_name,mob,dob,yob,positions,body_type,work_rate,preferred_foot,age,...,penalties,composure,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,20801,C. Ronaldo dos Santos Aveiro,Feb,5,1985,"['LW', 'ST']",C. Ronaldo,High / Low,Right,31,...,85,86,22,31,23,7,11,15,14,11
1,158023,Lionel Messi,Jun,24,1987,['RW'],Messi,Medium / Medium,Left,29,...,74,94,13,28,26,6,11,15,14,8
2,190871,Neymar da Silva Santos Jr.,Feb,5,1992,['LW'],Neymar,High / Medium,Right,24,...,81,80,21,24,33,9,9,15,15,11
3,176580,Luis Suárez,Jan,24,1987,['ST'],Normal,High / Medium,Right,29,...,85,83,30,45,38,27,25,31,33,37
4,167495,Manuel Neuer,Mar,27,1986,['GK'],Normal,Medium / Medium,Right,30,...,47,70,10,10,11,89,90,95,91,89


In [8]:
def format_positions(positions):
    pos_arr = positions.replace('[', '').replace(']', '').split(',')
    mod_pos = []
    for pos in pos_arr:
        mod_pos.append(pos.strip())
    return mod_pos

mod_pos = players['positions'].apply(format_positions)
players['positions'] = mod_pos

In [9]:
pos_set = []
for idx, row in players.iterrows():
    positions = row['positions']
    for pos in positions:
        if pos not in pos_set:
            pos_set.append(pos)

In [10]:
forwards = ["'ST'", "'LW'", "'RW'", "'CF'"]
midfielders = ["'CM'", "'LM'", "'RM'", "'CDM'", "'CAM'"]
defenders = ["'CB'", "'RB'", "'LB'", "'RWB'", "'LWB'"]
goalkeepers = ["'GK'"]

player_type = []
for pos_list in players['positions']:
    if pos_list[0] in forwards:
        player_type.append('forward')
    elif pos_list[0] in midfielders:
        player_type.append('midfielder')
    elif pos_list[0] in defenders:
        player_type.append('defender')
    elif pos_list[0] in goalkeepers:
        player_type.append('goalkeeper')

players['player_type'] = player_type

In [11]:
workrate_set = []
for idx, row in players.iterrows():
    work_rate = row['work_rate']
    if work_rate not in workrate_set:
        workrate_set.append(work_rate)            

# for w in workrate_set:
#     print(w)

In [12]:
players.head()

Unnamed: 0,player_id,player_name,mob,dob,yob,positions,body_type,work_rate,preferred_foot,age,...,composure,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes,player_type
0,20801,C. Ronaldo dos Santos Aveiro,Feb,5,1985,"['LW', 'ST']",C. Ronaldo,High / Low,Right,31,...,86,22,31,23,7,11,15,14,11,forward
1,158023,Lionel Messi,Jun,24,1987,['RW'],Messi,Medium / Medium,Left,29,...,94,13,28,26,6,11,15,14,8,forward
2,190871,Neymar da Silva Santos Jr.,Feb,5,1992,['LW'],Neymar,High / Medium,Right,24,...,80,21,24,33,9,9,15,15,11,forward
3,176580,Luis Suárez,Jan,24,1987,['ST'],Normal,High / Medium,Right,29,...,83,30,45,38,27,25,31,33,37,forward
4,167495,Manuel Neuer,Mar,27,1986,['GK'],Normal,Medium / Medium,Right,30,...,70,10,10,11,89,90,95,91,89,goalkeeper


In [16]:
list(players.columns.values)

['player_id',
 'player_name',
 'mob',
 'dob',
 'yob',
 'positions',
 'body_type',
 'work_rate',
 'preferred_foot',
 'age',
 'weight',
 'height',
 'overall_rating',
 'potential',
 'value',
 'wage',
 'intl_reputation',
 'weak_foot',
 'skill_moves',
 'crossing',
 'finishing',
 'heading_accuracy',
 'short_passing',
 'volleys',
 'dribbling',
 'curve',
 'free_kick_accuracy',
 'long_passing',
 'ball_control',
 'acceleration',
 'sprint_speed',
 'agility',
 'reactions',
 'balance',
 'shot_power',
 'jumping',
 'stamina',
 'strength',
 'long_shots',
 'aggression',
 'interceptions',
 'positioning',
 'vision',
 'penalties',
 'composure',
 'marking',
 'standing_tackle',
 'sliding_tackle',
 'gk_diving',
 'gk_handling',
 'gk_kicking',
 'gk_positioning',
 'gk_reflexes',
 'player_type']

In [33]:
forwards_frame = players.loc[players['player_type'] == 'forward']
midfielders_frame = players.loc[players['player_type'] == 'midfielder']
defenders_frame = players.loc[players['player_type'] == 'defender']
goalkeepers_frame = players.loc[players['player_type'] == 'goalkeeper']

forwards_frame.to_csv('../data/forwards.csv', index=False)
midfielders_frame.to_csv('../data/midfielders.csv', index=False)
defenders_frame.to_csv('../data/defenders.csv', index=False)
goalkeepers_frame.to_csv('../data/goalkeepers.csv', index=False)

In [34]:
unwanted_columns = ['player_name', 'mob', 'dob', 'yob', 'positions', 'body_type', 'work_rate', 'preferred_foot', 'player_type']
for col in unwanted_columns:
    forwards_frame = forwards_frame.drop(col, axis=1)
    midfielders_frame = midfielders_frame.drop(col, axis=1)
    defenders_frame = defenders_frame.drop(col, axis=1)
    goalkeepers_frame = goalkeepers_frame.drop(col, axis=1)
print(forwards_frame.columns.values)

['player_id' 'age' 'weight' 'height' 'overall_rating' 'potential' 'value'
 'wage' 'intl_reputation' 'weak_foot' 'skill_moves' 'crossing' 'finishing'
 'heading_accuracy' 'short_passing' 'volleys' 'dribbling' 'curve'
 'free_kick_accuracy' 'long_passing' 'ball_control' 'acceleration'
 'sprint_speed' 'agility' 'reactions' 'balance' 'shot_power' 'jumping'
 'stamina' 'strength' 'long_shots' 'aggression' 'interceptions'
 'positioning' 'vision' 'penalties' 'composure' 'marking' 'standing_tackle'
 'sliding_tackle' 'gk_diving' 'gk_handling' 'gk_kicking' 'gk_positioning'
 'gk_reflexes']


In [37]:
print(forwards_frame.shape)

(3294, 45)


In [20]:
np.save('../data/forwards', forwards_frame)
np.save('../data/midfielders', midfielders_frame)
np.save('../data/defenders', defenders_frame)
np.save('../data/goalkeepers', goalkeepers_frame)