In [24]:
# importing standard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [25]:
# loading and viewing our dataset
player_data = pd.read_csv("player_data.csv")
player_data.head()

Unnamed: 0,Player,Age,Goals_Scored,Assists,Matches_Played,Position,Pass_Accuracy,Team_Rank,Market_Value_in_Million
0,Lionel Messi,36,12,18,30,Forward,91.2,1,80.5
1,Cristiano Ronaldo,38,15,7,28,Forward,89.5,2,75.3
2,Kevin De Bruyne,32,8,15,25,Midfielder,92.7,3,65.0
3,Neymar Jr,31,10,12,24,Forward,88.1,4,60.2
4,Robert Lewandowski,35,20,5,30,Forward,85.9,5,70.8


In [26]:
# checking if there is any null value 
print(player_data.isnull().sum())

Player                     0
Age                        0
Goals_Scored               0
Assists                    0
Matches_Played             0
Position                   0
Pass_Accuracy              0
Team_Rank                  0
Market_Value_in_Million    0
dtype: int64


In [27]:
# now position is categorical value, we need to conver it into numerical (converted into boolean vale)
player_data = pd.get_dummies(player_data,columns=["Position"],drop_first=True)
player_data

Unnamed: 0,Player,Age,Goals_Scored,Assists,Matches_Played,Pass_Accuracy,Team_Rank,Market_Value_in_Million,Position_Forward,Position_Goalkeeper,Position_Midfielder
0,Lionel Messi,36,12,18,30,91.2,1,80.5,True,False,False
1,Cristiano Ronaldo,38,15,7,28,89.5,2,75.3,True,False,False
2,Kevin De Bruyne,32,8,15,25,92.7,3,65.0,False,False,True
3,Neymar Jr,31,10,12,24,88.1,4,60.2,True,False,False
4,Robert Lewandowski,35,20,5,30,85.9,5,70.8,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...
70,Raphinha,27,11,7,28,87.3,3,68.0,True,False,False
71,Joao Moutinho,37,1,8,24,86.2,10,35.8,False,False,True
72,Lucas Hernandez,28,2,3,26,85.4,9,48.9,False,False,False
73,Zlatan Ibrahimovic,42,9,2,22,80.1,11,20.5,True,False,False


In [8]:
# Convert boolean columns to integers (0 or 1)
player_data[['Position_Forward', 'Position_Goalkeeper', 'Position_Midfielder']] = player_data[['Position_Forward', 'Position_Goalkeeper', 'Position_Midfielder']].astype(int)
player_data.head()

Unnamed: 0,Player,Age,Goals_Scored,Assists,Matches_Played,Pass_Accuracy,Team_Rank,Market_Value_in_Million,Position_Forward,Position_Goalkeeper,Position_Midfielder
0,Lionel Messi,36,12,18,30,91.2,1,80.5,1,0,0
1,Cristiano Ronaldo,38,15,7,28,89.5,2,75.3,1,0,0
2,Kevin De Bruyne,32,8,15,25,92.7,3,65.0,0,0,1
3,Neymar Jr,31,10,12,24,88.1,4,60.2,1,0,0
4,Robert Lewandowski,35,20,5,30,85.9,5,70.8,1,0,0


In [28]:
player_data.head()


Unnamed: 0,Player,Age,Goals_Scored,Assists,Matches_Played,Pass_Accuracy,Team_Rank,Market_Value_in_Million,Position_Forward,Position_Goalkeeper,Position_Midfielder
0,Lionel Messi,36,12,18,30,91.2,1,80.5,True,False,False
1,Cristiano Ronaldo,38,15,7,28,89.5,2,75.3,True,False,False
2,Kevin De Bruyne,32,8,15,25,92.7,3,65.0,False,False,True
3,Neymar Jr,31,10,12,24,88.1,4,60.2,True,False,False
4,Robert Lewandowski,35,20,5,30,85.9,5,70.8,True,False,False


In [29]:
# Drop the 'Player' column from the DataFrame
player_data.drop(columns=['Player'], inplace=True)

# Check the updated DataFrame to confirm the column is dropped
print(player_data.head())

   Age  Goals_Scored  Assists  Matches_Played  Pass_Accuracy  Team_Rank  \
0   36            12       18              30           91.2          1   
1   38            15        7              28           89.5          2   
2   32             8       15              25           92.7          3   
3   31            10       12              24           88.1          4   
4   35            20        5              30           85.9          5   

   Market_Value_in_Million  Position_Forward  Position_Goalkeeper  \
0                     80.5              True                False   
1                     75.3              True                False   
2                     65.0             False                False   
3                     60.2              True                False   
4                     70.8              True                False   

   Position_Midfielder  
0                False  
1                False  
2                 True  
3                False  
4        

In [31]:
from sklearn.preprocessing import StandardScaler

# Select only numerical columns for scaling
numerical_columns = ['Age', 'Goals_Scored', 'Assists', 'Pass_Accuracy', 'Team_Rank','Position_Forward','Position_Goalkeeper','Position_Midfielder','Market_Value_in_Million']

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the numerical features
player_data[numerical_columns] = scaler.fit_transform(player_data[numerical_columns])

# Check the first few rows to ensure scaling is done
(player_data.head())


Unnamed: 0,Age,Goals_Scored,Assists,Matches_Played,Pass_Accuracy,Team_Rank,Market_Value_in_Million,Position_Forward,Position_Goalkeeper,Position_Midfielder
0,1.451258,0.672011,2.923195,30,1.311097,-1.364121,1.388242,1.259447,-0.267261,-0.685994
1,1.886635,1.131593,0.021106,28,0.80515,-1.005141,0.990845,1.259447,-0.267261,-0.685994
2,0.580503,0.059235,2.131716,25,1.757521,-0.646162,0.203692,-0.793999,-0.267261,1.457738
3,0.362815,0.365623,1.340237,24,0.388488,-0.287183,-0.163137,1.259447,-0.267261,-0.685994
4,1.233569,1.897563,-0.506546,30,-0.266267,0.071796,0.646943,1.259447,-0.267261,-0.685994


In [32]:
# Define features (X) and target (y)
X = player_data.drop(columns=['Market_Value_in_Million'])  # All columns except 'Goals_Scored'
y = player_data['Market_Value_in_Million']  # The target variable


In [47]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test

Unnamed: 0,Age,Goals_Scored,Assists,Matches_Played,Pass_Accuracy,Team_Rank,Position_Forward,Position_Goalkeeper,Position_Midfielder
4,1.233569,1.897563,-0.506546,30,-0.266267,0.071796,1.259447,-0.267261,-0.685994
63,-0.50794,-0.400347,0.812585,27,0.358726,-1.005141,-0.793999,-0.267261,1.457738
10,0.580503,0.825205,0.021106,26,0.448011,0.789754,1.259447,-0.267261,-0.685994
0,1.451258,0.672011,2.923195,30,1.311097,-1.364121,1.259447,-0.267261,-0.685994
35,1.668947,-0.859929,-1.298025,22,-1.337684,2.584649,-0.793999,-0.267261,-0.685994
61,-0.072563,-0.859929,-1.298025,25,-0.593645,0.789754,-0.793999,-0.267261,-0.685994
28,0.580503,0.212429,1.076411,25,0.180157,-0.646162,1.259447,-0.267261,-0.685994
12,-1.378695,-0.553541,-0.24272,25,0.328965,-0.646162,-0.793999,-0.267261,-0.685994
69,0.798192,-0.400347,0.548759,28,0.626581,-0.287183,-0.793999,-0.267261,1.457738
64,-0.943318,-0.093959,0.284932,27,0.209918,0.430775,-0.793999,-0.267261,1.457738


In [34]:
from sklearn.linear_model import LinearRegression

# Create an instance of the LinearRegression model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

In [48]:
# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred

array([ 0.90008567,  0.13863071,  0.02965337,  0.1245222 , -2.00885664,
       -0.8405112 , -0.6868535 ,  0.22362886, -0.10632227,  0.40685946,
        0.36737111,  0.41802939,  0.31863118, -0.51880504, -0.48517605])

In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Calculate performance metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}, MSE: {mse}, R²: {r2}')

MAE: 0.43658968826404965, MSE: 0.3615439128263639, R²: 0.4840655348851246


In [38]:
# Get model coefficients
coefficients = model.coef_
feature_names = X.columns
for feature, coef in zip(feature_names, coefficients):
    print(f'{feature}: {coef}')

Age: -0.2899859134873539
Goals_Scored: 0.9195032134706498
Assists: -0.11021920959634987
Matches_Played: 0.061230470991678554
Pass_Accuracy: 0.4162095042414079
Team_Rank: -0.09469316927983551
Position_Forward: -0.4087398880254247
Position_Goalkeeper: 0.3813478277739084
Position_Midfielder: -0.012072343191313256


In [39]:
# After fitting the model
intercept = model.intercept_

print(f'Intercept (b): {intercept}')

Intercept (b): -1.653711466492464
