In [None]:
# Imports

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn import linear_model

In [None]:
# Discover data set

fifa = pd.read_csv('fifa21_male2.csv', low_memory = False)

fifa.head()

In [None]:
# Convert 'Height'

conversions = [30.48, 2.54]
fifa['Height'] = fifa['Height'].str.replace('"','')
fifa['Height'] = fifa['Height'].str.split("'").apply(pd.Series).astype(int).dot(conversions)

fifa['Height'].head()

In [None]:
# Filter goal-keepers

fifa = fifa[fifa['Position']=='GK']

fifa.head()

In [None]:
# Select model columns

fifa = fifa[['ID', 'Name', 'Age', 'Height', 'OVA', 'POT', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats', 'IR']]

fifa.head() 

In [None]:
# Convert international rating

fifa['IR'] = fifa['IR'].str.replace('★', '').astype('int')

fifa.head()

In [None]:
# Correlation matrix

sns.set(rc = {'figure.figsize': (12, 6)})
sns.heatmap(fifa.corr(), annot = True)

In [None]:
# Normalize columns

normalizer = MinMaxScaler()

fifa[['OVA', 'Goalkeeping', 'IR', 'Total Stats', 'Base Stats', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Height']] = normalizer.fit_transform(fifa[['OVA', 'Goalkeeping', 'IR', 'Total Stats', 'Base Stats', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Height']])

In [None]:
# Hot-one-encode 'Age'

fifa = pd.get_dummies(fifa, columns = ['Age'], drop_first = True)

fifa = fifa[['ID', 'Age_17', 'Age_18', 'Age_19', 'Age_20', 'Age_21', 'Age_22', 'Age_23', 'Age_24', 'Age_25', 'Age_26', 'Age_27', 'Age_28', 'Age_29', 'Age_30', 'Age_31', 'Age_32', 'Age_33', 'Age_34', 'Age_35', 'Age_36', 'Age_37', 'Age_38', 'Age_39', 'Age_40', 'Age_41', 'Age_42', 'Age_43', 'Age_47', 'Height', 'OVA', 'POT', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats','IR']]

In [None]:
# Assign model features

X = fifa.drop(columns = ['POT'])
y = fifa['POT']

In [None]:
# Train model

model = linear_model.LinearRegression()
result = model.fit(X, y)

In [None]:
# Check model

result.score(X, y)

In [None]:
### M. NEUER

neuer = pd.read_csv('fifa21_male2.csv', usecols = ['Name', 'ID', 'Age', 'Height', 'POT', 'OVA', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats', 'IR'])
neuer = neuer[neuer['Name'] == 'M. Neuer']
neuer['Height'] = neuer['Height'] = 184
display(neuer)

neuer_check = pd.DataFrame([{'ID': 0.64582, 'Age_17': 0, 'Age_18': 0, 'Age_19': 0, 'Age_20': 0, 'Age_21': 0, 'Age_22': 0, 'Age_23': 0, 'Age_24': 0, 'Age_25': 0, 'Age_26': 0, 'Age_27': 0, 'Age_28': 0, 'Age_29': 0, 'Age_30': 0, 'Age_31': 0, 'Age_32': 0, 'Age_33': 0, 'Age_34': 1, 'Age_35': 0, 'Age_36': 0, 'Age_37': 0, 'Age_38': 0, 'Age_39': 0, 'Age_40': 0, 'Age_41': 0, 'Age_42': 0, 'Age_43': 0, 'Age_47': 0, 'Height': 0.341611, 'OVA': 0.956522, 'Goalkeeping': 1.0, 'GK Diving': 0.941176, 'GK Handling': 0.901961, 'GK Kicking': 0.962264, 'GK Positioning': 0.87037, 'GK Reflexes': 0.982759, 'Total Stats': 0.900115, 'Base Stats': 0.996047, 'IR': 1.0}])

### NOTES

# Input will only need age, height and reflexes - separated by spaces "Age Height Reflexes".
# For everything else, the average of the respective column is assumed.
# IR = 0, for it is a new player

# For example: "20 180 50"

### SETUP DATA

# Read needed columns
check_fifa = pd.read_csv('fifa21_male2.csv', usecols = ['ID', 'Age', 'Height', 'OVA', 'Goalkeeping', 'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats', 'Base Stats', 'Position'])

# Shift 'Height' column
height_column = check_fifa.pop('Height')
check_fifa.insert(2, 'Height', height_column)

# Select goalkeepers + drop 'Position'
check_fifa = check_fifa[check_fifa['Position'] == 'GK'].drop('Position', axis = 1)

# Convert height
check_fifa['Height'] = check_fifa['Height'].str.replace('"','')
check_fifa['Height'] = check_fifa['Height'].str.split("'").apply(pd.Series).astype(int).dot([30.48, 2.54])

# CREATE CHECK DICT

# This is our new player test data frame with mean values
check_dict = dict(zip(check_fifa.columns, check_fifa.mean()))

# USER INPUT AGE / HEIGHT / REFLEXES

# Ranges
range_age = 'age', check_fifa['Age'].min(), check_fifa['Age'].max()
range_height = 'height', check_fifa['Height'].min(), check_fifa['Height'].max()
range_reflexes = 'reflexes', check_fifa['GK Reflexes'].min(), check_fifa['GK Reflexes'].max()

# Input 'Age Height Reflexes'
def input_user(check_dict):
   user = input(f'NEW GOALKEEPER (Age, Height, Reflexes => e.g. "20 180 50")')
   user = user.split(' ')
   if int(user[0]) in set(check_fifa['Age']):
      insert_user(user[0], user[1], user[2], check_dict)
   else:
      input_user(check_dict)

# Set up user test data
def insert_user(user_age, user_height, user_reflexes, check_dict):
   new_keys = list(check_dict.keys())
   new_values = list(check_dict.values())
   # Build up dummified age columns
   for age in reversed(list(set(check_fifa['Age']))):
      new_keys.insert(1, f'Age_{age}')
      new_values.insert(1, 0)
   check_dict = dict(zip(new_keys, new_values))
   del check_dict['Age']
   del check_dict['Age_16']
   check_dict['IR'] = 0
   # Set user input values
   check_dict[f'Age_{user_age}'] = 1
   check_dict['Height'] = int(user_height)
   check_dict['GK Reflexes'] = int(user_reflexes)
   normalize_test(check_dict)

# Normalize test data
def normalize_test(check_dict):
   for key, value in check_dict.items():
      if key in check_fifa.columns:
         min_val = check_fifa[key].min()
         max_val = check_fifa[key].max()
         check_dict[key] = (check_dict[key] - min_val) / (max_val - min_val)
   predict_user(check_dict)

# Predict user
def predict_user(check_dict):
   print('\n' + 'Our prediction for the new goalkeeper: POT', result.predict(pd.DataFrame([check_dict])))

print('92% model prediction for "Manuel Neuer"', result.predict(neuer_check))
   
input_user(check_dict)
