## **2022 Football Transfer Analysis and Prediction**

## أسس تعلم الآلة -مجموعة ب
#### **Sarmad MOHAMMED**

In [292]:
import pandas as pd

### Importing the dataset 

In [168]:
# Read data into a DataFrame
Football = pd.read_csv('Summer22_FootballTransfers.csv')

In [169]:
# Displaying the top 5 rows of the dataset for a quick visualization of the data
Football.head()

Unnamed: 0,name,position,age,origin_club,league_origin_club,country_origin_club,new_club,league_new_club,country_new_club,cost,date_of_transfer
0,Amine Gouiri,Centre-Forward,22.0,OGC Nice,Ligue 1,France,Stade Rennais FC,Ligue 1,France,€42.00m,9/1/2022
1,Umar Sadiq,Centre-Forward,25.0,UD Almería,LaLiga,Spain,Real Sociedad,LaLiga,Spain,€18.00m,9/1/2022
2,Carlos Soler,Central Midfield,25.0,Valencia CF,LaLiga,Spain,Paris Saint-Germain,Ligue 1,France,€50.00m,9/1/2022
3,Manuel Akanji,Centre-Back,27.0,Borussia Dortmund,Bundesliga,Germany,Manchester City,Premier League,England,€30.00m,9/1/2022
4,Wout Faes,Centre-Back,24.0,Stade Reims,Ligue 1,France,Leicester City,Premier League,England,€10.00m,9/1/2022


### Exploratory Analysis

In [170]:
# The dimension of the dataframe in the matrix format (rows, columns)
dimension = Football.shape
print('Dataframe dimensions:', dimension)

Dataframe dimensions: (33625, 11)


In [171]:
print("Number of columns: ", len(Football.columns.values))
print("\n")
print(Football.columns.values)

Number of columns:  11


['name' 'position' 'age' 'origin_club' 'league_origin_club'
 'country_origin_club' 'new_club' 'league_new_club' 'country_new_club'
 'cost' 'date_of_transfer']


In [172]:
Missing_Values = Football.isnull().sum()
Missing_Values

name                      5
position                 10
age                     148
origin_club               5
league_origin_club      940
country_origin_club     935
new_club                 10
league_new_club        5578
country_new_club       5574
cost                      4
date_of_transfer          0
dtype: int64

In [173]:
# Missing values and Filling factor

def MissingValues_FillingFactor(dfp):
    missing_df = dfp.isnull().sum(axis=0).reset_index()
    missing_df.columns = ['Variable', 'Missing Values']
    missing_df['Filling Factor (%)'] = (dfp.shape[0] - missing_df['Missing Values']) / dfp.shape[0]*100
    df = missing_df.sort_values('Filling Factor (%)').reset_index(drop = True)
    return df

In [174]:
MissingValues_FillingFactor(Football)

Unnamed: 0,Variable,Missing Values,Filling Factor (%)
0,league_new_club,5578,83.411152
1,country_new_club,5574,83.423048
2,league_origin_club,940,97.204461
3,country_origin_club,935,97.219331
4,age,148,99.559851
5,position,10,99.97026
6,new_club,10,99.97026
7,name,5,99.98513
8,origin_club,5,99.98513
9,cost,4,99.988104


In [175]:
# Percentage of missing values in the dataset

MS_V = Missing_Values.sum()       # 13209
FS = Football.size                # 369875  <-- (33625 * 11)

print('Percentage of missing values in the dataset:\n', round( (MS_V / FS) *100, 2), '%')

Percentage of missing values in the dataset:
 3.57 %


In [176]:
# Checking the unique values in the each features
Football.nunique()

name                   32577
position                  16
age                       32
origin_club             5819
league_origin_club       462
country_origin_club      153
new_club                6317
league_new_club          463
country_new_club         139
cost                     118
date_of_transfer          63
dtype: int64

In [177]:
# Checking the data types for each features
Football.dtypes

name                    object
position                object
age                    float64
origin_club             object
league_origin_club      object
country_origin_club     object
new_club                object
league_new_club         object
country_new_club        object
cost                    object
date_of_transfer        object
dtype: object

In [178]:
# Getting the descriptive statistics on the data
Football_club.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,33477.0,25.020432,4.784488,15.0,21.0,24.0,28.0,46.0


### Data Cleaning

In [179]:
# Remove missing values (NAs) from (cost, origin_club, name, new_club, position, age) columns
Football.dropna(subset=['cost', 'origin_club', 'name', 'new_club', 'position', 'age'], inplace=True)

In [180]:
# Missing values and Filling factor
MissingValues_FillingFactor(Football)

Unnamed: 0,Variable,Missing Values,Filling Factor (%)
0,league_new_club,5525,83.487253
1,country_new_club,5525,83.487253
2,league_origin_club,927,97.229445
3,country_origin_club,927,97.229445
4,name,0,100.0
5,position,0,100.0
6,age,0,100.0
7,origin_club,0,100.0
8,new_club,0,100.0
9,cost,0,100.0


In [181]:
print('Dataframe dimensions before drop:', dimension)

print('Dataframe dimensions after drop :', Football.shape)

Dataframe dimensions before drop: (33625, 11)
Dataframe dimensions after drop : (33459, 11)


In [182]:
# Changing type of column (age) from float to int (ex: 15.0  --->  15)
Football['age'] = Football['age'].astype(int)

In [183]:
pos = Football['position'].unique()

print('Number of unique values :', len(pos))
print()
print(pos)

Number of unique values : 16

['Centre-Forward' 'Central Midfield' 'Centre-Back' 'Defensive Midfield'
 'Left Winger' 'Attacking Midfield' 'Left-Back' 'Second Striker'
 'Right Winger' 'Goalkeeper' 'defence' 'Right-Back' 'midfield'
 'Right Midfield' 'Left Midfield' 'attack']


In [184]:
# Checking the position of the players 
Football['position'].value_counts()

Centre-Back           5323
Centre-Forward        4982
Central Midfield      3613
Goalkeeper            3343
Defensive Midfield    2473
Right-Back            2405
Left-Back             2354
Attacking Midfield    2295
Left Winger           2244
Right Winger          2159
midfield               513
Right Midfield         462
Left Midfield          414
Second Striker         314
attack                 312
defence                253
Name: position, dtype: int64

In [185]:
position_map = {'Centre-Back':'D',
                'Centre-Forward':'F',
                'Central Midfield':'M',
                'Goalkeeper': 'G',
                'Defensive Midfield': 'M',
                'Right-Back':'D',
                'Left-Back':'D',
                'Attacking Midfield': 'M',
                'Right Winger': 'F',
                'Left Winger':'F',
                'midfield':'M',
                'Right Midfield':'M',
                'Left Midfield': 'M',
                'Second Striker': 'F',  
                'attack':'F',
                'defence':'D',}

Football['new_position'] = pd.Series(Football.position.map(position_map), index = Football.index)
Football.head()

Unnamed: 0,name,position,age,origin_club,league_origin_club,country_origin_club,new_club,league_new_club,country_new_club,cost,date_of_transfer,new_position
0,Amine Gouiri,Centre-Forward,22,OGC Nice,Ligue 1,France,Stade Rennais FC,Ligue 1,France,€42.00m,9/1/2022,F
1,Umar Sadiq,Centre-Forward,25,UD Almería,LaLiga,Spain,Real Sociedad,LaLiga,Spain,€18.00m,9/1/2022,F
2,Carlos Soler,Central Midfield,25,Valencia CF,LaLiga,Spain,Paris Saint-Germain,Ligue 1,France,€50.00m,9/1/2022,M
3,Manuel Akanji,Centre-Back,27,Borussia Dortmund,Bundesliga,Germany,Manchester City,Premier League,England,€30.00m,9/1/2022,D
4,Wout Faes,Centre-Back,24,Stade Reims,Ligue 1,France,Leicester City,Premier League,England,€10.00m,9/1/2022,D


In [186]:
# Encoding categorical data (new_position) into numeric variable
Football = pd.get_dummies(Football, columns = ['new_position'])
Football.rename(columns = {'new_position_D':'D', 'new_position_F':'F', 'new_position_G':'G', 'new_position_M':'M'}, inplace = True)

In [188]:
Football.head()

Unnamed: 0,name,position,age,origin_club,league_origin_club,country_origin_club,new_club,league_new_club,country_new_club,cost,date_of_transfer,D,F,G,M
0,Amine Gouiri,Centre-Forward,22,OGC Nice,Ligue 1,France,Stade Rennais FC,Ligue 1,France,€42.00m,9/1/2022,0,1,0,0
1,Umar Sadiq,Centre-Forward,25,UD Almería,LaLiga,Spain,Real Sociedad,LaLiga,Spain,€18.00m,9/1/2022,0,1,0,0
2,Carlos Soler,Central Midfield,25,Valencia CF,LaLiga,Spain,Paris Saint-Germain,Ligue 1,France,€50.00m,9/1/2022,0,0,0,1
3,Manuel Akanji,Centre-Back,27,Borussia Dortmund,Bundesliga,Germany,Manchester City,Premier League,England,€30.00m,9/1/2022,1,0,0,0
4,Wout Faes,Centre-Back,24,Stade Reims,Ligue 1,France,Leicester City,Premier League,England,€10.00m,9/1/2022,1,0,0,0


In [141]:
print(len(Football_club['country_origin_club'].unique()))
print(len(Football_club['country_new_club'].unique()))
print(len(Football_club['league_origin_club'].unique()))
print(len(Football_club['league_new_club'].unique()))
print(len(Football_club['origin_club'].unique()))
print(len(Football_club['new_club'].unique()))

154
140
463
463
5801
6296


**Encode column values with value between 0 and n_values-1 --> LabelEncoder**

In [252]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

CNC_encoded = le.fit_transform(Football['country_new_club'])
COC_encoded = le.fit_transform(Football['country_origin_club'])
OC_encoded = le.fit_transform(Football['origin_club'])
NC_encoded = le.fit_transform(Football['new_club'])
LOC_encoded = le.fit_transform(Football['league_origin_club'])
LNC_encoded = le.fit_transform(Football['league_new_club'])

print(CNC_encoded)
print(COC_encoded)
print(NC_encoded)
print(OC_encoded)
print(LOC_encoded)
print(LNC_encoded)

[ 45 117  45 ... 103  19 126]
[ 47 129 129 ... 115  16 140]
[5442 4687 4357 ... 4734 1448 1918]
[4011 5304 5522 ... 4487 1400 1899]
[211 182 182 ... 345  66 414]
[222 192 222 ... 208  75 224]


In [254]:
Football['CNC_encoded'] = CNC_encoded
Football['COC_encoded'] = COC_encoded
Football['NC_encoded'] = COC_encoded
Football['OC_encoded'] = COC_encoded
Football['LOC_encoded'] = COC_encoded
Football['LNC_encoded'] = COC_encoded

Football.head()

Unnamed: 0,name,position,age,origin_club,league_origin_club,country_origin_club,new_club,league_new_club,country_new_club,cost,...,D,F,G,M,CNC_encoded,COC_encoded,NC_encoded,OC_encoded,LOC_encoded,LNC_encoded
0,Amine Gouiri,Centre-Forward,22,OGC Nice,Ligue 1,France,Stade Rennais FC,Ligue 1,France,42000000,...,0,1,0,0,45,47,47,47,47,47
1,Umar Sadiq,Centre-Forward,25,UD Almería,LaLiga,Spain,Real Sociedad,LaLiga,Spain,18000000,...,0,1,0,0,117,129,129,129,129,129
2,Carlos Soler,Central Midfield,25,Valencia CF,LaLiga,Spain,Paris Saint-Germain,Ligue 1,France,50000000,...,0,0,0,1,45,129,129,129,129,129
3,Manuel Akanji,Centre-Back,27,Borussia Dortmund,Bundesliga,Germany,Manchester City,Premier League,England,30000000,...,1,0,0,0,39,50,50,50,50,50
4,Wout Faes,Centre-Back,24,Stade Reims,Ligue 1,France,Leicester City,Premier League,England,10000000,...,1,0,0,0,39,47,47,47,47,47


In [189]:
cost = Football['cost'].unique()

print('Number of unique values :', len(cost))
print()
print(cost)

Number of unique values : 118

['€42.00m' '€18.00m' '€50.00m' '€30.00m' '€10.00m' '€28.00m' '€15.00m'
 '€6.00m' '€12.00m' '€7.00m' '€16.00m' '€6.50m' '€3.00m' '€5.00m' '€8.00m'
 '€27.00m' '€3.50m' '€2.00m' '€1.50m' '€4.00m' '€1.60m' '€500Th.'
 '€900Th.' '€150Th.' '€200Th.' '€250Th.' '€450Th.' '€1.00m' '€400Th.'
 '€1.80m' '€1.20m' '€50Th.' '€225Th.' '€25Th.' '€600Th.' '€125Th.'
 '€750Th.' '€300Th.' '€100Th.' '€275Th.' '€1.40m' '€700Th.' '€75Th.'
 '€20.00m' '€2.50m' '€800Th.' '€175Th.' '€350Th.' '€10Th.' '€2.80m'
 '€375Th.' '€325Th.' '€22.00m' '€4.50m' '€1.10m' '€17.00m' '€550Th.'
 '€13.00m' '€1.30m' '€650Th.' '€9.00m' '€475Th.' '€2.20m' '€40.00m'
 '€25.00m' '€11.00m' '€850Th.' '€35.00m' '€48.00m' '€55.00m' '€675Th.'
 '€425Th.' '€7.50m' '€825Th.' '€5.50m' '€775Th.' '€14.00m' '€2.40m'
 '€4.80m' '€2.30m' '€8.50m' '€15Th.' '€3.20m' '€24.00m' '€625Th.' '€1.70m'
 '€4.70m' '€525Th.' '€1.90m' '€725Th.' '€60.00m' '€2.90m' '€32.00m'
 '€925Th.' '€4.30m' '€2.70m' '€2.60m' '€1.25m' '€70.00m' '€45.00

In [190]:
Football['cost'] = Football.cost.str.replace('m','0.000')
Football['cost'] = Football.cost.str.replace('€','')
Football['cost'] = Football.cost.str.replace('Th.','000')
Football['cost'] = Football.cost.str.replace('.','')

Football['cost'] = (Football['cost'].astype(float)).astype(int)

In [191]:
cost = Football['cost'].unique()

print('Number of unique values :', len(cost))
print()
print(cost)

Number of unique values : 118

[ 42000000  18000000  50000000  30000000  10000000  28000000  15000000
   6000000  12000000   7000000  16000000   6500000   3000000   5000000
   8000000  27000000   3500000   2000000   1500000   4000000   1600000
    500000    900000    150000    200000    250000    450000   1000000
    400000   1800000   1200000     50000    225000     25000    600000
    125000    750000    300000    100000    275000   1400000    700000
     75000  20000000   2500000    800000    175000    350000     10000
   2800000    375000    325000  22000000   4500000   1100000  17000000
    550000  13000000   1300000    650000   9000000    475000   2200000
  40000000  25000000  11000000    850000  35000000  48000000  55000000
    675000    425000   7500000    825000   5500000    775000  14000000
   2400000   4800000   2300000   8500000     15000   3200000  24000000
    625000   1700000   4700000    525000   1900000    725000  60000000
   2900000  32000000    925000   4300000   270

In [192]:
# Check if exact string 'Unknown' exists in [origin_club] column
print((Football['origin_club'].eq('Unknown')).any())

# Check if exact string 'Without Club' exists in [origin_club] column
print((Football['origin_club'].eq('Without Club')).any())

True
True


In [193]:
Football_club.loc[Football_club['origin_club']=='Unknown','origin_club'] = 'No club'
Football_club.loc[Football_club['origin_club']=='Without Club','origin_club'] = 'No club'

In [194]:
# Check if exact string 'Unknown' exists in [new_club] column
print((Football['new_club'].eq('Unknown')).any())

# Check if exact string 'Without Club' exists in [new_club] column
print((Football['new_club'].eq('Without Club')).any())

# Check if exact string 'Without Clunewb' exists in [new_club] column
print((Football['new_club'].eq('Career break')).any())

True
True
True


In [195]:
Football.loc[Football['new_club']=='Without Club','new_club'] = 'No club'
Football.loc[Football['new_club']=='Unknown','new_club'] = 'No club'
Football.loc[Football['new_club']=='Career break','new_club'] = 'No club'

In [196]:
Football.head()

Unnamed: 0,name,position,age,origin_club,league_origin_club,country_origin_club,new_club,league_new_club,country_new_club,cost,date_of_transfer,D,F,G,M
0,Amine Gouiri,Centre-Forward,22,OGC Nice,Ligue 1,France,Stade Rennais FC,Ligue 1,France,42000000,9/1/2022,0,1,0,0
1,Umar Sadiq,Centre-Forward,25,UD Almería,LaLiga,Spain,Real Sociedad,LaLiga,Spain,18000000,9/1/2022,0,1,0,0
2,Carlos Soler,Central Midfield,25,Valencia CF,LaLiga,Spain,Paris Saint-Germain,Ligue 1,France,50000000,9/1/2022,0,0,0,1
3,Manuel Akanji,Centre-Back,27,Borussia Dortmund,Bundesliga,Germany,Manchester City,Premier League,England,30000000,9/1/2022,1,0,0,0
4,Wout Faes,Centre-Back,24,Stade Reims,Ligue 1,France,Leicester City,Premier League,England,10000000,9/1/2022,1,0,0,0


### Analysis of dataset

In [197]:
# Who is the most expensive player and who is the cheapest player

print("Minimum Transfer Fee: ", Football['cost'].min())
print("Maximum Transfer Fee: ", Football['cost'].max())

Minimum Transfer Fee:  8000
Maximum Transfer Fee:  150000000


**Number of transfers per countries**

In [198]:
number_of_countries_displayed = 15

tranfers_per_country = Football.groupby('country_new_club')['name'].count().sort_values(ascending=False).nlargest(number_of_countries_displayed)
mean_transfers_country = Football.groupby('country_new_club')['name'].count().mean()

print(tranfers_per_country)

country_new_club
Italy                 4001
Spain                 2111
Germany               1463
Turkey                1090
Russia                 988
England                875
France                 824
Romania                755
Serbia                 661
Portugal               552
Greece                 506
Netherlands            500
Croatia                477
Iran                   439
Bosnia-Herzegovina     412
Name: name, dtype: int64


**Money spent per club**

In [202]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
#pd.reset_option('display.float_format')

In [207]:
# The club that spends the most money according to the transfers
cost_per_club = Football.groupby('new_club')['cost'].agg(['min','max','mean','sum','count'])
cost_per_club.sort_values(by='sum', ascending=False).head(20)

Unnamed: 0_level_0,min,max,mean,sum,count
new_club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No club,8000,16000000,160534.39,828518000,5161
Chelsea FC,3000000,70000000,27100000.0,271000000,10
FC Barcelona,4000000,60000000,32750000.0,262000000,8
Paris Saint-Germain,20000000,55000000,35857142.86,251000000,7
Manchester City,6000000,150000000,49200000.0,246000000,5
Juventus FC,2500000,65000000,22950000.0,229500000,10
Nottingham Forest,125000,30000000,10505952.38,220625000,21
Bayern Munich,125000,70000000,28589285.71,200125000,7
Tottenham Hotspur,2000000,48000000,22500000.0,180000000,8
Olympique Marseille,3000000,25000000,11966666.67,179500000,15


**Money spent per country**

In [205]:
# The country that spends the most money according to the transfers
cost_per_country = Football.groupby('country_new_club')['cost'].agg(['min','max','mean','sum','count'])
cost_per_country.sort_values(by='sum', ascending=False)

Unnamed: 0_level_0,min,max,mean,sum,count
country_new_club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
England,10000,150000000,3262491.43,2854680000.00,875
Italy,10000,70000000,569810.05,2279810000.00,4001
Spain,10000,60000000,756717.20,1597430000.00,2111
Germany,10000,70000000,888660.29,1300110000.00,1463
France,10000,55000000,1467924.76,1209570000.00,824
...,...,...,...,...,...
Angola,75000,75000,75000.00,75000.00,1
Maldives,75000,75000,75000.00,75000.00,1
Chinese Taipei,25000,25000,25000.00,50000.00,2
Nigeria,50000,50000,50000.00,50000.00,1


### **Machine Learning**

In [234]:
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import r2_score

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestRegressor 
from sklearn.model_selection import GridSearchCV

In [214]:
features_1 = ['age','F','M','D','G']

X = Football[features_1]
y = Football.cost

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 0)

scX = StandardScaler()

X_train = scX.fit_transform(X_train)
X_test = scX.fit_transform(X_test)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(26767, 5)
(6692, 5)
(26767,)
(6692,)


In [232]:
#features_1 = ['age','F','M','D','G']

LR_model = LinearRegression()

LR_model.fit(X_train, y_train)

y_pred = LR_model.predict(X_test)

r2score = r2_score(y_test, y_pred) 

r2score = f"{r2score:1f}"
print(r2score)


-8391511107170014855168.000000


In [287]:
def Split_Scaling_Model(features):
    
    X = Football[features]
    y = Football.cost

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 0)

    scX = StandardScaler()

    X_train = scX.fit_transform(X_train)
    X_test = scX.fit_transform(X_test)

    print(features)
    print('------------------')
    print(X_train.shape)
    print(X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    print('------------------------------------------------------------------')
    
    Model = RandomForestRegressor(random_state=100)

    params = {
          'n_estimators':[200],
          'min_samples_leaf':[2,3,4,5],
          'max_depth':[2,3,4,5,6,7,8]
            }

    grid_search = GridSearchCV(estimator=Model, param_grid=params, verbose=2, n_jobs=-1, scoring='r2')
    grid_search.fit(X_train, y_train)
    
    
    Model_best = grid_search.best_estimator_

    y_train_pred = Model_best.predict(X_train)
    y_test_pred = Model_best.predict(X_test)

    r2_score_train = r2_score(y_train, y_train_pred)
    r2_score_test  = r2_score(y_test, y_test_pred)

    print('------------------------------------------------------------------')
    print("Train score: {:.5f} %".format(r2_score_train))
    print("Test score : {:.5f} %".format(r2_score_test))

In [288]:
features_1 = ['age','F','M','D','G']
Split_Scaling_Model(features_1)

['age', 'F', 'M', 'D', 'G']
------------------
(26767, 5)
(6692, 5)
(26767,)
(6692,)
------------------------------------------------------------------
Fitting 5 folds for each of 28 candidates, totalling 140 fits
------------------------------------------------------------------
Train score: 0.00763 %
Test score : 0.00354 %


In [289]:
features_2 = ['age','F','M','D','G','CNC_encoded', 'COC_encoded']
Split_Scaling_Model(features_2)

['age', 'F', 'M', 'D', 'G', 'CNC_encoded', 'COC_encoded']
------------------
(26767, 7)
(6692, 7)
(26767,)
(6692,)
------------------------------------------------------------------
Fitting 5 folds for each of 28 candidates, totalling 140 fits
------------------------------------------------------------------
Train score: 0.27393 %
Test score : 0.14285 %


In [290]:
features_3 = ['age','F','M','D','G','CNC_encoded', 'COC_encoded', 'LOC_encoded', 'LNC_encoded']
Split_Scaling_Model(features_3)

['age', 'F', 'M', 'D', 'G', 'CNC_encoded', 'COC_encoded', 'LOC_encoded', 'LNC_encoded']
------------------
(26767, 9)
(6692, 9)
(26767,)
(6692,)
------------------------------------------------------------------
Fitting 5 folds for each of 28 candidates, totalling 140 fits
------------------------------------------------------------------
Train score: 0.27394 %
Test score : 0.14286 %


$$ \Large
R^2 = 1 - \frac{\sum \left(y_i-\hat{y}_i\right)^2 }{\sum \left(y_i-\bar{y}\right)^2}
$$

<br><br>

$\Large y_i: \text{Actual Value}$ <br><br>
$\Large \hat{y}_i: \text{Predicted Value}$ <br><br>
$\Large \bar{y}: \text{Mean of the actual values}$
