In [72]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [73]:
df = pd.read_csv('players_20.csv')

In [74]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18483 entries, 0 to 18482
Columns: 106 entries, sofifa_id to rb
dtypes: float64(18), int64(44), object(44)
memory usage: 14.9+ MB


In [75]:
player_stat_defensive_columns = ['overall', 'defending', 'defending_standing_tackle',
                                    'defending_sliding_tackle', 'attacking_heading_accuracy',
                                    'physic', 'mentality_aggression', 'mentality_interceptions',
                                    'attacking_short_passing', 'skill_ball_control', 'movement_reactions',
                                    'power_jumping', 'international_reputation'
                                    ]

In [76]:
# Selecting only defenders
df_defenders = df[df['player_positions'] == 'CB']

In [77]:
df_defenders = df_defenders[player_stat_defensive_columns]
df_defenders

Unnamed: 0,overall,defending,defending_standing_tackle,defending_sliding_tackle,attacking_heading_accuracy,physic,mentality_aggression,mentality_interceptions,attacking_short_passing,skill_ball_control,movement_reactions,power_jumping,international_reputation
7,90,90.0,92,85,86,86.0,82,89,78,76,88,90,3
11,89,89.0,90,87,83,87.0,87,88,71,71,86,81,3
16,89,90.0,91,89,83,82.0,91,88,65,61,82,89,4
18,89,87.0,87,90,92,85.0,90,88,80,83,87,93,4
29,88,88.0,88,87,85,80.0,76,89,83,82,85,75,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18426,49,48.0,50,54,49,56.0,47,50,31,26,42,71,1
18458,48,49.0,51,47,43,52.0,43,52,28,29,45,66,1
18474,48,46.0,48,49,46,53.0,41,49,31,35,47,75,1
18478,48,47.0,50,52,40,51.0,46,48,35,35,40,70,1


In [78]:
df_defenders['international_reputation'].unique()

array([3, 4, 1, 2])

In [79]:
df_defenders['international_reputation'] = df_defenders['international_reputation'].astype('object')

In [80]:
## Dummy variable encoding our dataset
df_defenders_dummies = pd.get_dummies(df_defenders, dtype='int', drop_first=True)
df_defenders_dummies

Unnamed: 0,overall,defending,defending_standing_tackle,defending_sliding_tackle,attacking_heading_accuracy,physic,mentality_aggression,mentality_interceptions,attacking_short_passing,skill_ball_control,movement_reactions,power_jumping,international_reputation_2,international_reputation_3,international_reputation_4
7,90,90.0,92,85,86,86.0,82,89,78,76,88,90,0,1,0
11,89,89.0,90,87,83,87.0,87,88,71,71,86,81,0,1,0
16,89,90.0,91,89,83,82.0,91,88,65,61,82,89,0,0,1
18,89,87.0,87,90,92,85.0,90,88,80,83,87,93,0,0,1
29,88,88.0,88,87,85,80.0,76,89,83,82,85,75,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18426,49,48.0,50,54,49,56.0,47,50,31,26,42,71,0,0,0
18458,48,49.0,51,47,43,52.0,43,52,28,29,45,66,0,0,0
18474,48,46.0,48,49,46,53.0,41,49,31,35,47,75,0,0,0
18478,48,47.0,50,52,40,51.0,46,48,35,35,40,70,0,0,0


In [81]:
df_defenders_dummies.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
overall,2343.0,66.772941,6.918187,48.0,63.0,67.0,71.0,90.0
defending,2343.0,66.02006,7.442236,45.0,61.0,66.0,71.0,90.0
defending_standing_tackle,2343.0,67.714895,7.116079,46.0,63.0,67.0,72.0,92.0
defending_sliding_tackle,2343.0,65.341869,7.292743,43.0,61.0,65.0,70.0,90.0
attacking_heading_accuracy,2343.0,66.202732,8.622917,38.0,61.0,67.0,72.0,92.0
physic,2343.0,71.058472,6.683429,42.0,67.0,72.0,76.0,88.0
mentality_aggression,2343.0,66.889885,10.683267,40.0,59.0,68.0,75.0,92.0
mentality_interceptions,2343.0,64.704652,8.388201,43.0,59.0,64.0,70.0,90.0
attacking_short_passing,2343.0,56.58216,10.613019,25.0,50.0,58.0,64.0,83.0
skill_ball_control,2343.0,52.537345,10.460011,24.0,45.0,54.0,60.0,83.0


In [83]:
# Setting your Predictor and Response Variables
X = df_defenders_dummies.drop(columns=['overall'])
y = df_defenders_dummies['overall']

In [84]:
# Splitting your training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [85]:
lm = LinearRegression()

In [86]:
lm.fit(X_train, y_train)

In [87]:
m_list = lm.coef_
c = lm.intercept_

In [88]:
m_list

array([0.44863544, 0.05533326, 0.04576075, 0.05847089, 0.13491481,
       0.04483185, 0.03514334, 0.04511292, 0.04236564, 0.05330698,
       0.01886931, 0.1143022 , 0.85083302, 1.29137196])

In [89]:
# Making Predictions
y_pred = lm.predict(X_test)

# Calculate the mean-squared-error
print('MSE:', mean_squared_error(y_test, y_pred))
# Calculate the root-mean-squared-error
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
# Calculate the root-mean-absolute-error
print('MAE:', mean_absolute_error(y_test, y_pred))
# Calculate the R-squared metric
print('R_squared:', r2_score(y_test, y_pred))

MSE: 0.21656781581561493
RMSE: 0.4653684731646687
MAE: 0.3739759741205955
R_squared: 0.9949792045966848


In [98]:
# Making Predictions
for index, pred_value in enumerate(y_pred):
    print(f"Overall: {np.array(y_test)[index]} \nPrediction: {round(pred_value, 2)}\n\n") 

Overall: 77 
Prediction: 76.46


Overall: 68 
Prediction: 68.01


Overall: 71 
Prediction: 70.97


Overall: 65 
Prediction: 64.73


Overall: 62 
Prediction: 61.49


Overall: 68 
Prediction: 68.42


Overall: 57 
Prediction: 57.22


Overall: 54 
Prediction: 54.05


Overall: 58 
Prediction: 58.26


Overall: 69 
Prediction: 69.22


Overall: 52 
Prediction: 52.62


Overall: 76 
Prediction: 75.52


Overall: 65 
Prediction: 64.93


Overall: 57 
Prediction: 57.66


Overall: 64 
Prediction: 64.52


Overall: 57 
Prediction: 57.16


Overall: 65 
Prediction: 64.8


Overall: 68 
Prediction: 67.63


Overall: 58 
Prediction: 58.61


Overall: 67 
Prediction: 67.24


Overall: 66 
Prediction: 65.69


Overall: 71 
Prediction: 70.34


Overall: 69 
Prediction: 68.6


Overall: 74 
Prediction: 73.84


Overall: 68 
Prediction: 67.12


Overall: 67 
Prediction: 67.44


Overall: 56 
Prediction: 55.59


Overall: 73 
Prediction: 73.28


Overall: 53 
Prediction: 52.92


Overall: 63 
Prediction: 63.38


Overall: 64 