# FIFA 19 Project

## Importing the Libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix , accuracy_score,r2_score

##Import the Dataset

In [None]:
dataset = pd.read_csv("FIFA19_player_stats.csv")
x = dataset.iloc[: , :-1].values
y = dataset.iloc[: , -1].values

In [None]:
x

array([[3.0, 68.0, 64.0, ..., 11.0, 11.0, 'RCM'],
       [4.0, 77.0, 70.0, ..., 13.0, 7.0, 'LF'],
       [4.0, 80.0, 68.0, ..., 14.0, 8.0, 'CM'],
       ...,
       [2.0, 39.0, 29.0, ..., 14.0, 7.0, 'CB'],
       [3.0, 58.0, 55.0, ..., 10.0, 7.0, 'CAM'],
       [2.0, 22.0, 22.0, ..., 15.0, 10.0, 'CB']], dtype=object)

In [None]:
y

array([71, 86, 79, ..., 58, 62, 51])

##Encoding Independent Variable

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder' , OneHotEncoder() , [-1])] ,remainder= 'passthrough')
x = np.array(ct.fit_transform(x))

In [None]:
x

array([[0.0, 0.0, 0.0, ..., 13.0, 11.0, 11.0],
       [0.0, 0.0, 0.0, ..., 6.0, 13.0, 7.0],
       [0.0, 0.0, 0.0, ..., 7.0, 14.0, 8.0],
       ...,
       [0.0, 1.0, 0.0, ..., 9.0, 14.0, 7.0],
       [1.0, 0.0, 0.0, ..., 11.0, 10.0, 7.0],
       [0.0, 1.0, 0.0, ..., 11.0, 15.0, 10.0]], dtype=object)

##Dealing with missing values

In [None]:
#If missing value is present returns true or else false
from sklearn.impute import MissingIndicator
indicator = MissingIndicator()
indicator.fit_transform(x)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [None]:
from sklearn.impute import SimpleImputer
imp_x = SimpleImputer(missing_values=np.nan, strategy='mean')
x = imp_x.fit_transform(x)
imp_y = SimpleImputer(missing_values=np.nan, strategy='mean')
y = imp_x.fit_transform(y.reshape(-1,1))

In [None]:
x

array([[ 0.,  0.,  0., ..., 13., 11., 11.],
       [ 0.,  0.,  0., ...,  6., 13.,  7.],
       [ 0.,  0.,  0., ...,  7., 14.,  8.],
       ...,
       [ 0.,  1.,  0., ...,  9., 14.,  7.],
       [ 1.,  0.,  0., ..., 11., 10.,  7.],
       [ 0.,  1.,  0., ..., 11., 15., 10.]])

In [None]:
y

array([[71.],
       [86.],
       [79.],
       ...,
       [58.],
       [62.],
       [51.]])

##Spilting the Data into Training  and Test set

In [None]:
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 0)

In [None]:
x_train

array([[ 0.,  0.,  0., ...,  9., 12.,  7.],
       [ 0.,  0.,  0., ..., 13., 10., 14.],
       [ 0.,  0.,  0., ...,  7., 14., 16.],
       ...,
       [ 0.,  0.,  0., ..., 58., 60., 72.],
       [ 0.,  0.,  0., ..., 14.,  9., 15.],
       [ 0.,  0.,  0., ...,  8.,  9.,  9.]])

In [None]:
x_test

array([[ 0.,  0.,  0., ..., 11.,  8.,  9.],
       [ 0.,  1.,  0., ...,  9.,  8.,  6.],
       [ 1.,  0.,  0., ..., 12., 12., 19.],
       ...,
       [ 0.,  1.,  0., ...,  6., 15., 11.],
       [ 0.,  0.,  0., ...,  7., 15., 10.],
       [ 0.,  0.,  0., ..., 10.,  5.,  6.]])

In [None]:
y_train

array([[51.],
       [62.],
       [72.],
       ...,
       [65.],
       [72.],
       [65.]])

In [None]:
y_test

array([[61.],
       [58.],
       [66.],
       ...,
       [65.],
       [58.],
       [54.]])

##Applying Different Models

###Multiple Regression

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train , y_train)

In [None]:
y_pred = reg.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred.reshape(len(y_pred),1) , y_test.reshape(len(y_test),1)),1)


array([[60.11, 61.  ],
       [58.4 , 58.  ],
       [66.57, 66.  ],
       ...,
       [66.36, 65.  ],
       [57.45, 58.  ],
       [55.47, 54.  ]])

In [None]:
r2_score(y_test,y_pred)

0.8901915638124778

###Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
x_reg = PolynomialFeatures(degree = 2)
x_poly = x_reg.fit_transform(x_train)
lin_reg = LinearRegression()
lin_reg.fit(x_poly , y_train)

In [None]:
y_pred_poly = lin_reg.predict(x_reg.transform(x_test))
np.set_printoptions(precision=2)
np.concatenate((y_pred_poly.reshape(len(y_pred_poly),1) , y_test.reshape(len(y_test),1)),1)

array([[60.75, 61.  ],
       [57.97, 58.  ],
       [66.77, 66.  ],
       ...,
       [64.49, 65.  ],
       [58.1 , 58.  ],
       [54.08, 54.  ]])

In [None]:
r2_score(y_test,y_pred_poly)

-1623301865.3557966

###SVR

In [None]:
from sklearn.svm import SVR
sv_reg = SVR(kernel = 'rbf')
sv_reg.fit(x_train , y_train)

  y = column_or_1d(y, warn=True)


In [None]:
y_pred_svr = sv_reg.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_svr.reshape(len(y_pred_svr),1) , y_test.reshape(len(y_test),1)),1)

array([[61.58, 61.  ],
       [57.29, 58.  ],
       [65.43, 66.  ],
       ...,
       [66.07, 65.  ],
       [58.15, 58.  ],
       [57.3 , 54.  ]])

In [None]:
r2_score(y_test,y_pred_svr)

0.9643429033563149

In FIFA 19 Player_stat__prediction by SVR we get r2_score of .98 after feature scaling.

###Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
reg_tree = DecisionTreeRegressor(random_state = 0)
reg_tree.fit(x_train , y_train)

In [None]:
y_pred_tree = reg_tree.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_tree.reshape(len(y_pred_tree),1) , y_test.reshape(len(y_test),1)),1)

array([[62., 61.],
       [62., 58.],
       [69., 66.],
       ...,
       [68., 65.],
       [58., 58.],
       [51., 54.]])

In [None]:
r2_score(y_test , y_pred_tree)

0.8919634635429532

###Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
rand_reg = RandomForestRegressor(n_estimators= 100 , random_state = 0)
rand_reg.fit(x_train , y_train)

  rand_reg.fit(x_train , y_train)


In [None]:
y_pred_rand = rand_reg .predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_rand.reshape(len(y_pred_rand),1) , y_test.reshape(len(y_test),1)),1)

array([[62.05, 61.  ],
       [56.94, 58.  ],
       [65.75, 66.  ],
       ...,
       [65.84, 65.  ],
       [57.89, 58.  ],
       [55.04, 54.  ]])

In [None]:
r2_score(y_test , y_pred_rand)

0.9614583990024913