In [1]:
#Importing Libraries

import numpy as np #To handle Mathematical calculations
import matplotlib.pyplot as plt #To plot charts 
%matplotlib inline
import pandas as pd #To import and manage datasets
import glob
import os
import warnings
# pd.set_option('display.max_columns', None)  
# pd.set_option('display.max_rows', None)  

#Disable warning messages
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
#Load preprocessed dataset
combinedPlayerDataframe = pd.read_csv("Preprocessed Player Data.csv") 

In [3]:
combinedPlayerDataframe.head()

Unnamed: 0,Name,Team,Age,Position,Apps,Minutes,Total goals,Total Assists,Shots per game,Key passes per game,...,Passes per game,Pass success percentage,Crosses per game,Long balls per game,Through balls per game,Total Goals,Yellow cards,Red cards,Aerials won per game,Man of the match
0,Arjen Robben,Bayern Munich,26,Midfielder,18,1779,16,6,3.4,1.8,...,34.3,80.2,1.5,2.0,0.2,16,1,0,0.4,8
1,Stefan Kießling,Bayer Leverkusen,26,Forward,33,2924,21,5,3.0,1.6,...,32.5,72.4,0.2,0.4,0.1,21,3,0,2.1,6
2,Zvjezdan Misimovic,Wolfsburg,28,Attacking Midfielder,31,2768,10,13,2.4,3.3,...,51.1,77.5,2.5,5.7,0.5,10,7,0,0.2,8
3,Edin Dzeko,Wolfsburg,24,Forward,33,3003,22,7,4.5,1.1,...,19.5,70.7,0.2,1.1,0.1,22,4,0,0.7,6
4,Claudio Pizarro,Werder Bremen,31,Attacking Midfielder,23,2130,16,2,2.6,1.3,...,24.2,75.5,0.1,0.8,0.0,16,4,0,0.4,2


In [4]:
#Load forecasted rating dataset
forecastedPlayerDataframe = pd.read_csv("Forecasted Ratings 2019-2022.csv") 

In [5]:
forecastedPlayerDataframe.head()

Unnamed: 0,Name,Season,Rating,Age,Mean,Growth
0,Aaron Hughes,2009,6.95,30,6.88,-0.2
1,Aaron Hunt,2009,7.15,23,7.02375,-0.36
2,Aaron Lennon,2009,7.33,23,6.806667,-0.95
3,Aaron Mokoena,2009,6.56,29,6.56,0.0
4,Abdelhamid El Kaoutari,2009,6.7,20,6.866667,-0.2


### Handling young player data (Age: 23-28, Season 2018)

In [6]:
#Creating a seperate DF for players who are between the age of 23-28 in 2018

youngPlayers2018Df = forecastedPlayerDataframe.loc[(forecastedPlayerDataframe.Season == 2018) 
                                                   & ((forecastedPlayerDataframe.Age > 22) 
                                                       & (forecastedPlayerDataframe.Age < 29))]

In [7]:
#Storing the names of the young players on to list

youngPlayerNames = youngPlayers2018Df.Name.tolist()

In [8]:
#Creating a seperate dictionary entry for every player
#Creating a new row of data for every player with the mean of all their stats
#Rating has been replaced by the forecasted rating average

young_players_ = {}

for name in youngPlayerNames:
    playerMeanRating = forecastedPlayerDataframe.loc[(forecastedPlayerDataframe.Name == name)]['Rating'].mean()
    playerDf = combinedPlayerDataframe.loc[(combinedPlayerDataframe.Name == name)]
    playerDf.loc['average'] =  playerDf.mean()
    playerDf.loc['average', 'Name'] = name
    playerDf.loc['average', 'Team'] = playerDf['Team'][playerDf.index[-2]]
    playerDf.loc['average', 'Position'] = playerDf['Position'][playerDf.index[-2]]
    playerDf.loc['average', 'League'] = playerDf['League'][playerDf.index[-2]]
    playerDf.loc['average', 'Rating'] = playerDf['Rating'][playerDf.index[-2]]
    playerDf = playerDf.iloc[-1:]
    young_players_[name] = playerDf

In [9]:
young_players_['Leroy Sané']

Unnamed: 0,Name,Team,Age,Position,Apps,Minutes,Total goals,Total Assists,Shots per game,Key passes per game,...,Passes per game,Pass success percentage,Crosses per game,Long balls per game,Through balls per game,Total Goals,Yellow cards,Red cards,Aerials won per game,Man of the match
average,Leroy Sané,Manchester City,21.5,Midfielder,22.75,2098.0,8.25,8.5,1.75,1.3,...,26.7,81.85,0.375,0.325,0.05,8.25,3.25,0.0,0.6,1.75


In [10]:
#Grouping youg player data into seperate position based dictionary entries

postionYoungPlayers_ = {}

Midfielders = []
Forwards = []
Attacking_Midfielders = []
Defenders = []
Goalkeepers = []
Defensive_Midfielders = []

for name in young_players_:
    position = young_players_[name].Position.tolist()[0]
    if position == 'Midfielder':
        Midfielders.append(young_players_[name])
    if position == 'Forward':
        Forwards.append(young_players_[name])
    if position == 'Attacking Midfielder':
        Attacking_Midfielders.append(young_players_[name])
    if position == 'Defender':
        Defenders.append(young_players_[name])
    if position == 'Goalkeeper':
        Goalkeepers.append(young_players_[name])
    if position == 'Defensive Midfielder':
        Defensive_Midfielders.append(young_players_[name])
    
postionYoungPlayers_['Midfielder']  = pd.concat(Midfielders)
postionYoungPlayers_['Forward'] = pd.concat(Forwards)
postionYoungPlayers_['Attacking Midfielder'] = pd.concat(Attacking_Midfielders)
postionYoungPlayers_['Defender'] = pd.concat(Defenders)
postionYoungPlayers_['Goalkeeper'] = pd.concat(Goalkeepers)
postionYoungPlayers_['Defensive Midfielder'] = pd.concat(Defensive_Midfielders)

In [11]:
postionYoungPlayers_['Defensive Midfielder'].head()

Unnamed: 0,Name,Team,Age,Position,Apps,Minutes,Total goals,Total Assists,Shots per game,Key passes per game,...,Passes per game,Pass success percentage,Crosses per game,Long balls per game,Through balls per game,Total Goals,Yellow cards,Red cards,Aerials won per game,Man of the match
average,Abdoulaye Touré,Nantes,24.5,Defensive Midfielder,30.5,2781.5,2.0,1.0,1.35,0.6,...,38.6,79.9,0.15,1.9,0.05,2.0,3.5,0.5,1.65,2.0
average,Adrien Rabiot,Paris Saint-Germain,21.0,Defensive Midfielder,16.0,1473.571429,2.0,1.857143,0.757143,0.514286,...,54.842857,89.2,0.142857,2.2,0.142857,2.0,2.0,0.142857,1.385714,1.428571
average,Afriyie Acquah,Empoli,23.428571,Defensive Midfielder,14.857143,1297.285714,1.142857,0.714286,0.857143,0.628571,...,29.585714,81.442857,0.242857,1.314286,0.057143,1.142857,5.142857,0.714286,0.7,0.285714
average,Allan,Napoli,25.0,Defensive Midfielder,29.571429,2657.571429,1.428571,4.571429,0.642857,1.014286,...,48.642857,85.357143,0.157143,2.542857,0.1,1.428571,5.714286,0.142857,0.457143,1.571429
average,Andrei Girotto,Nantes,26.5,Defensive Midfielder,18.0,1500.0,1.5,0.5,1.1,0.4,...,32.8,77.85,0.1,1.95,0.0,1.5,6.0,1.5,2.35,0.5


### Predicting replacements for older players

In [13]:
#Choose club

club_name = 'Real Madrid'

In [14]:
#Creating a sperate DF for older players

olderPlayersDf = combinedPlayerDataframe.loc[(combinedPlayerDataframe.Team == club_name) 
                                            & (combinedPlayerDataframe.Season == 2018) 
                                            & (combinedPlayerDataframe.Age > 32)]

In [15]:
#Creating a seperate dictionary to store the average stat data of the best years (24-29) of the older players

older_players_ = {}

for index, row in olderPlayersDf.iterrows():
    
    currentDf = combinedPlayerDataframe.loc[(combinedPlayerDataframe.Name == row['Name']) 
                                            & (combinedPlayerDataframe.Age > 23)  
                                            & (combinedPlayerDataframe.Age < 30)]
    currentDf.loc['average'] =  currentDf.mean()
    currentDf.loc['average', 'Name'] = row['Name']
    currentDf.loc['average', 'Team'] = currentDf['Team'][currentDf.index[-2]]
    currentDf.loc['average', 'Position'] = currentDf['Position'][currentDf.index[-2]]
    currentDf.loc['average', 'League'] = currentDf['League'][currentDf.index[-2]]
    currentDf = currentDf.iloc[-1:]
    older_players_[row['Name']] = currentDf

In [21]:
#Recommending young replacements within the age of (23-28)

li = []

for name in older_players_:
    position = older_players_[name].Position.tolist()[0]
    li.append(older_players_[name])
    youngDf = postionYoungPlayers_[position]
    li.append(youngDf)
    
    combinedDf = pd.concat(li)
    
    stats = combinedDf.copy(deep=True)
    stats.drop(['Name', 'Team', 'League', 'Position'], axis=1, inplace=True)
    
    stats_with_name = stats
    stats = stats.dropna()
    stats_with_name = stats_with_name.dropna()
    stats_with_name["Name"] = combinedDf["Name"]
    
    stats_with_name = stats_with_name.reset_index(drop=True)
    
    from sklearn.preprocessing import StandardScaler
    from sklearn.neighbors import NearestNeighbors
    from sklearn.decomposition import PCA
    from sklearn.exceptions import DataConversionWarning

    import warnings
    warnings.filterwarnings(action="ignore",category=DataConversionWarning)
    
    scaled = StandardScaler()
    X = scaled.fit_transform(stats)
    
    
    recommendations = NearestNeighbors(n_neighbors=4,algorithm='ball_tree').fit(X)
    
    player_indices = recommendations.kneighbors(X)[1]
    
    print("")
    print("Older Player: " + name)
    index = stats_with_name[stats_with_name["Name"]==name].index.tolist()[0]
    count = 1
    for i in player_indices[index][1:]:
            print('Option ' + str(count) + ': ' + stats_with_name.iloc[i]["Name"])
            count += 1

    
    li=[]


Older Player: Luka Modric
Option 1: Ilkay Gündogan
Option 2: Thiago Alcántara
Option 3: Kevin Kampl

Older Player: Sergio Ramos
Option 1: Íñigo Martínez
Option 2: Aymeric Laporte
Option 3: Salif Sané


### Predicting replacements for active underperforming players