## Importing the libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix , accuracy_score,r2_score

## Importing the dataset

In [5]:
dataset = pd.read_csv("FIFA19_player_stats.csv")
x = dataset.iloc[: , :-1].values
y = dataset.iloc[: , -1].values

In [6]:
x

array([[3.0, 68.0, 64.0, ..., 11.0, 11.0, 'RCM'],
       [4.0, 77.0, 70.0, ..., 13.0, 7.0, 'LF'],
       [4.0, 80.0, 68.0, ..., 14.0, 8.0, 'CM'],
       ...,
       [2.0, 39.0, 29.0, ..., 14.0, 7.0, 'CB'],
       [3.0, 58.0, 55.0, ..., 10.0, 7.0, 'CAM'],
       [2.0, 22.0, 22.0, ..., 15.0, 10.0, 'CB']], dtype=object)

In [7]:
y

array([71, 86, 79, ..., 58, 62, 51])

##Encoding Independent Variable

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder' , OneHotEncoder() , [-1])] ,remainder= 'passthrough')
x = np.array(ct.fit_transform(x))

In [9]:
x

array([[0.0, 0.0, 0.0, ..., 13.0, 11.0, 11.0],
       [0.0, 0.0, 0.0, ..., 6.0, 13.0, 7.0],
       [0.0, 0.0, 0.0, ..., 7.0, 14.0, 8.0],
       ...,
       [0.0, 1.0, 0.0, ..., 9.0, 14.0, 7.0],
       [1.0, 0.0, 0.0, ..., 11.0, 10.0, 7.0],
       [0.0, 1.0, 0.0, ..., 11.0, 15.0, 10.0]], dtype=object)

##Dealing with missing values

In [10]:
#If missing value is present returns true or else false
from sklearn.impute import MissingIndicator
indicator = MissingIndicator()
indicator.fit_transform(x)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [11]:
from sklearn.impute import SimpleImputer
imp_x = SimpleImputer(missing_values=np.nan, strategy='mean')
x = imp_x.fit_transform(x)
imp_y = SimpleImputer(missing_values=np.nan, strategy='mean')
y = imp_x.fit_transform(y.reshape(-1,1))

## Splitting the dataset into the Training set and Test set

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

In [13]:
print(x_train)

[[ 0.  0.  0. ...  9. 13.  7.]
 [ 0.  0.  1. ... 12. 15. 12.]
 [ 0.  0.  0. ...  6.  7. 11.]
 ...
 [ 0.  0.  0. ... 58. 60. 72.]
 [ 0.  0.  0. ... 14.  9. 15.]
 [ 0.  0.  0. ...  8.  9.  9.]]


In [14]:
print(y_train)

[[58.]
 [65.]
 [60.]
 ...
 [65.]
 [72.]
 [65.]]


In [15]:
print(x_test)

[[ 0.  0.  0. ... 11.  8.  9.]
 [ 0.  1.  0. ...  9.  8.  6.]
 [ 1.  0.  0. ... 12. 12. 19.]
 ...
 [ 0.  0.  1. ... 10.  6.  7.]
 [ 0.  0.  0. ... 55. 55. 60.]
 [ 1.  0.  0. ...  9. 11.  9.]]


In [16]:
print(y_test)

[[61.]
 [58.]
 [66.]
 ...
 [66.]
 [58.]
 [74.]]


##Applying Different Models

###Polynomial Regression

In [17]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
x_reg = PolynomialFeatures(degree = 3)
x_poly = x_reg.fit_transform(x_train)
lin_reg = LinearRegression()
lin_reg.fit(x_poly , y_train)

In [18]:
y_pred_poly = lin_reg.predict(x_reg.transform(x_test))
np.set_printoptions(precision=2)
np.concatenate((y_pred_poly.reshape(len(y_pred_poly),1) , y_test.reshape(len(y_test),1)),1)

array([[60.88, 61.  ],
       [57.44, 58.  ],
       [65.07, 66.  ],
       ...,
       [64.34, 66.  ],
       [58.08, 58.  ],
       [78.61, 74.  ]])

In [19]:
r2_score(y_test,y_pred_poly)

0.8529771437551354

###Multiple Regression

In [20]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train , y_train)

In [21]:
y_pred = reg.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred.reshape(len(y_pred),1) , y_test.reshape(len(y_test),1)),1)

array([[60.17, 61.  ],
       [58.38, 58.  ],
       [66.58, 66.  ],
       ...,
       [67.04, 66.  ],
       [58.94, 58.  ],
       [73.66, 74.  ]])

In [22]:
r2_score(y_test,y_pred)

0.890446738902014

###SVR

In [23]:
from sklearn.svm import SVR
sv_reg = SVR(kernel = 'rbf')
sv_reg.fit(x_train , y_train)

  y = column_or_1d(y, warn=True)


In [24]:
y_pred_svr = sv_reg.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_svr.reshape(len(y_pred_svr),1) , y_test.reshape(len(y_test),1)),1)

array([[61.58, 61.  ],
       [57.3 , 58.  ],
       [65.39, 66.  ],
       ...,
       [67.38, 66.  ],
       [58.  , 58.  ],
       [75.14, 74.  ]])

In [25]:
r2_score(y_test,y_pred_svr)

0.9635723054879171

In FIFA 19 Player_stat__prediction by SVR we get r2_score of .98 after feature scaling.

###Decision Tree

In [26]:
from sklearn.tree import DecisionTreeRegressor
reg_tree = DecisionTreeRegressor(random_state = 0)
reg_tree.fit(x_train , y_train)

In [27]:
y_pred_tree = reg_tree.predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_tree.reshape(len(y_pred_tree),1) , y_test.reshape(len(y_test),1)),1)

array([[64., 61.],
       [56., 58.],
       [69., 66.],
       ...,
       [64., 66.],
       [60., 58.],
       [73., 74.]])

In [28]:
r2_score(y_test , y_pred_tree)

0.8952436675503934

###Random Forest

In [29]:
from sklearn.ensemble import RandomForestRegressor
rand_reg = RandomForestRegressor(n_estimators= 1000 , random_state = 0)
rand_reg.fit(x_train , y_train)

  rand_reg.fit(x_train , y_train)


In [30]:
y_pred_rand = rand_reg .predict(x_test)
np.set_printoptions(precision=2)
np.concatenate((y_pred_rand.reshape(len(y_pred_rand),1) , y_test.reshape(len(y_test),1)),1)

array([[62.01, 61.  ],
       [57.14, 58.  ],
       [66.49, 66.  ],
       ...,
       [66.12, 66.  ],
       [58.28, 58.  ],
       [74.27, 74.  ]])

In [31]:
r2_score(y_test , y_pred_rand)

0.9619840604092115