Did the train test split on the data

In [1]:
import pickle

In [2]:
with open('train_test_data.pkl', 'rb') as file:
    x_train, x_test, y_train, y_test = pickle.load(file)

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score,mean_absolute_error

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [5]:
trf = ColumnTransformer([('trf',OneHotEncoder(sparse_output = False,drop = 'first'),['batting_team','bowling_team','city'])
                        ]
                       ,remainder = 'passthrough')

## LinearRegression

In [6]:
lr_model = LinearRegression()

lr_pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',StandardScaler()),
    ('step3',lr_model)
])

lr_pipe.fit(x_train,y_train)
lr_pred = lr_pipe.predict(x_test)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)
lr_mar = mean_absolute_error(y_test, lr_pred)

In [7]:
print(f"Mean Squared Error: {lr_mse}")
print(f"Mean Absolute Error: {lr_mar}")
print(f"R-squared: {lr_r2}")

Mean Squared Error: 186.98080588849706
Mean Absolute Error: 9.422448189419034
R-squared: 0.5926450298081365


## PolynomialFeatures

In [10]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

PR_model = PolynomialFeatures(degree=2, interaction_only=True, include_bias=True)

PR_pipe = Pipeline([
    ('step1',trf),
    ('scaler', StandardScaler()),
    ('model', PR_model),
    ('regressor', Ridge(alpha=0.01))
])

PR_pipe.fit(x_train, y_train)
PR_pred = PR_pipe.predict(x_test)
PR_mse = mean_squared_error(y_test, PR_pred)
PR_r2 = r2_score(y_test, PR_pred)
PR_mar = mean_absolute_error(y_test, PR_pred)

In [11]:
print(f"Mean Squared Error: {PR_mse}")
print(f"Mean Absolute Error: {PR_mar}")
print(f"R-squared: {PR_r2}")

Mean Squared Error: 38.59055488320435
Mean Absolute Error: 3.8080247017740545
R-squared: 0.9225876173874757


## Ridge, Lasso

In [12]:
from sklearn.linear_model import Ridge, Lasso

ridge_model = Ridge(alpha=1.0)  
R_pipe = Pipeline([
    ('step1',trf),
    ('scaler', StandardScaler()),
    ('model', ridge_model),
])

R_pipe.fit(x_train, y_train)
ridge_predictions = R_pipe.predict(x_test)
ridge_mse = mean_squared_error(y_test, ridge_predictions)
ridge_r2 = r2_score(y_test, ridge_predictions)
ridge_mar = mean_absolute_error(y_test, ridge_predictions)

In [13]:
print(f"Mean Squared Error: {ridge_mse}")
print(f"Mean Absolute Error: {ridge_mar}")
print(f"R-squared: {ridge_r2}")

Mean Squared Error: 186.9816946481209
Mean Absolute Error: 9.42234937882819
R-squared: 0.5926412622299985


In [14]:
lasso_model = Lasso(alpha=0.01)
l_pipe = Pipeline([
    ('step1',trf),
    ('scaler', StandardScaler()),
    ('model', lasso_model),
])

l_pipe.fit(x_train, y_train)
lasso_predictions = l_pipe.predict(x_test)
lasso_mse = mean_squared_error(y_test, lasso_predictions)
lasso_r2 = r2_score(y_test, lasso_predictions)
lasso_mar = mean_absolute_error(y_test, lasso_predictions)

In [15]:
print(f"Mean Squared Error: {lasso_mse}")
print(f"Mean Absolute Error: {lasso_mar}")
print(f"R-squared: {lasso_r2}")

Mean Squared Error: 187.33921752419525
Mean Absolute Error: 9.433322585447858
R-squared: 0.5908033562893759


## DecisionTreeRegressor

In [17]:
from sklearn.tree import DecisionTreeRegressor

Decision_model = DecisionTreeRegressor(max_depth=None,min_samples_split=2,min_samples_leaf=1)   
DT_pipe = Pipeline([
    ('step1',trf),
    ('scaler', StandardScaler()),
    ('model', Decision_model),
])

DT_pipe.fit(x_train, y_train)
DT_predictions = DT_pipe.predict(x_test)
DT_mse = mean_squared_error(y_test, DT_predictions)
DT_r2 = r2_score(y_test, DT_predictions)
DT_mar = mean_absolute_error(y_test, DT_predictions)

In [18]:
print(f"Mean Squared Error: {DT_mse}")
print(f"Mean Absolute Error: {DT_mar}")
print(f"R-squared: {DT_r2}")

Mean Squared Error: 31.454148581150484
Mean Absolute Error: 0.9343162557781206
R-squared: 0.938685130151984


## RandomForestRegressor

In [20]:
from sklearn.ensemble import RandomForestRegressor

RF_model = RandomForestRegressor(n_estimators = 200,max_depth=None,min_samples_split=2,min_samples_leaf=1,max_features = 'log2')  
RF_pipe = Pipeline([
    ('step1',trf),
    ('scaler', StandardScaler()),
    ('model', RF_model),
])

RF_pipe.fit(x_train, y_train)
RF_predictions = RF_pipe.predict(x_test)
RF_mse = mean_squared_error(y_test, RF_predictions)
RF_r2 = r2_score(y_test, RF_predictions)
RF_mar = mean_absolute_error(y_test, RF_predictions)

In [21]:
print(f"Mean Squared Error: {RF_mse}")
print(f"Mean Absolute Error: {RF_mar}")
print(f"R-squared: {RF_r2}")

Mean Squared Error: 9.972109201475563
Mean Absolute Error: 1.51429122423715
R-squared: 0.9810698843261398


In [24]:
x_train.shape

(31152, 8)

In [25]:
y_train.shape

(31152, 2)

## GradientBoostingRegressor

In [27]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor

base_model = GradientBoostingRegressor(n_estimators=300, learning_rate=1.0, max_depth=5)
multioutput_model = MultiOutputRegressor(base_model)

gradient_boost_pipe = Pipeline([
    ('preprocessor', trf),
    ('scaler', StandardScaler()),
    ('model', multioutput_model) , 
])

gradient_boost_pipe.fit(x_train, y_train)
GB_predictions = gradient_boost_pipe.predict(x_test)
GB_mse = mean_squared_error(y_test, GB_predictions)
GB_r2 = r2_score(y_test, GB_predictions)
GB_mar = mean_absolute_error(y_test, GB_predictions)

In [28]:
print(f"Mean Squared Error: {GB_mse}")
print(f"Mean Absolute Error: {GB_mar}")
print(f"R-squared: {GB_r2}")

Mean Squared Error: 19.816369957519523
Mean Absolute Error: 2.402600527241031
R-squared: 0.9649712487309912


## SVR

In [32]:
from sklearn.svm import SVR

svr_base_model = SVR(kernel='rbf', C=10, epsilon=1)
multioutput_modell = MultiOutputRegressor(svr_base_model)

SVR_pipe = Pipeline([
    ('preprocessor', trf),
    ('scaler', StandardScaler()),
    ('model', multioutput_modell)
])

SVR_pipe.fit(x_train, y_train)

SVR_predictions = SVR_pipe.predict(x_test)
SVR_mse = mean_squared_error(y_test, SVR_predictions)
SVR_r2 = r2_score(y_test, SVR_predictions)
SVR_mar = mean_absolute_error(y_test, SVR_predictions)

In [33]:
print(f"Mean Squared Error: {SVR_mse}")
print(f"Mean Absolute Error: {SVR_mar}")
print(f"R-squared: {SVR_r2}")

Mean Squared Error: 38.95287543823907
Mean Absolute Error: 2.672690800866083
R-squared: 0.9362298013698962


## KNeighborsRegressor

In [35]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor(n_neighbors=3,weights='distance',p=1) 

knn_pipe = Pipeline(steps=[
    ('step1', trf),
    ('step3', knn_model)
])

knn_pipe.fit(x_train, y_train)
knn_pred = knn_pipe.predict(x_test)
knn_mse = mean_squared_error(y_test, knn_pred)
knn_r2 = r2_score(y_test, knn_pred)
knn_mar = mean_absolute_error(y_test, knn_pred)

In [36]:
print(f"Mean Squared Error: {knn_mse}")
print(f"Mean Absolute Error: {knn_mar}")
print(f"R-squared: {knn_r2}")

Mean Squared Error: 43.69805824309412
Mean Absolute Error: 2.6534537829599816
R-squared: 0.9109007020078486


## BayesianRidge

In [41]:
from sklearn.linear_model import BayesianRidge

bayesian_model = BayesianRidge(max_iter=200,alpha_1=1e-6,lambda_1=1e-6)
multioutput_modl = MultiOutputRegressor(bayesian_model)

bayesian_pipe = Pipeline(steps=[
    ('step1', trf),
    ('step2', StandardScaler()),
    ('step3', multioutput_modl)
])

bayesian_pipe.fit(x_train, y_train)
bayesian_pred = bayesian_pipe.predict(x_test)
bayesian_mse = mean_squared_error(y_test, bayesian_pred)
bayesian_r2 = r2_score(y_test, bayesian_pred)
bayesian_mar = mean_absolute_error(y_test, bayesian_pred)


In [42]:
print(f"Mean Squared Error: {bayesian_mse}")
print(f"Mean Absolute Error: {bayesian_mar}")
print(f"R-squared: {bayesian_r2}")

Mean Squared Error: 186.9986622131126
Mean Absolute Error: 9.421519606279947
R-squared: 0.5925634374037967


## HuberRegressor

In [43]:
from sklearn.linear_model import HuberRegressor

huber_model = HuberRegressor()
multioutput_modll = MultiOutputRegressor(huber_model)
huber_pipe = Pipeline(steps=[
    ('step1', trf),
    ('step2', StandardScaler()),
    ('step3', multioutput_modll)
])

huber_pipe.fit(x_train, y_train)
huber_pred = huber_pipe.predict(x_test)
huber_mse = mean_squared_error(y_test, huber_pred)
huber_r2 = r2_score(y_test, huber_pred)
huber_mar = mean_absolute_error(y_test, huber_pred)


In [44]:
print(f"Mean Squared Error: {huber_mse}")
print(f"Mean Absolute Error: {huber_mar}")
print(f"R-squared: {huber_r2}")

Mean Squared Error: 193.0691150214018
Mean Absolute Error: 9.295066825874493
R-squared: 0.5803074685082323


## XGBRegressor

In [45]:
from xgboost  import XGBRegressor
pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',StandardScaler()),
    ('step3',XGBRegressor(n_estimators=1000,learning_rate=0.2,max_depth=12,random_state=1))
])
pipe.fit(x_train,y_train)

  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [46]:
XGB_pred = pipe.predict(x_test)
XGB_mse = mean_squared_error(y_test, XGB_pred)
XGB_r2 = r2_score(y_test, XGB_pred)
XGB_mar = mean_absolute_error(y_test, XGB_pred)

In [47]:
print(f"Mean Squared Error: {XGB_mse}")
print(f"Mean Absolute Error: {XGB_mar}")
print(f"R-squared: {XGB_r2}")

Mean Squared Error: 6.853709771315634
Mean Absolute Error: 1.056283712906291
R-squared: 0.9887635603912082


In [49]:
from tabulate import tabulate

comparison_data = {
    'Model': [
        'Linear Regression', 'Polynomial', 'Ridge', 'Lasso', 'Decision Tree', 
        'Random Forest', 'GradientBoosting', 'SVR', 'KNN', 'BayesianRidge',
        'Huber Regression', 'XGBRegressor'
    ],
    'MSE': [
        lr_mse, PR_mse, ridge_mse, lasso_mse, DT_mse, RF_mse, GB_mse, SVR_mse,
        knn_mse, bayesian_mse, huber_mse, XGB_mse
    ],
    'R2': [
        lr_r2, PR_r2, ridge_r2, lasso_r2, DT_r2, RF_r2, GB_r2, SVR_r2, knn_r2,
        bayesian_r2, huber_r2, XGB_r2
    ],
    'MAE': [
        lr_mar, PR_mar, ridge_mar, lasso_mar, DT_mar, RF_mar, GB_mar, SVR_mar,
        knn_mar, bayesian_mar, huber_mar, XGB_mar
    ]
}

# Create a comparison table
comparison_table = tabulate(comparison_data, headers='keys', tablefmt='heavy_outline')

# Print the comparison table
print(comparison_table)


┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓
┃ Model             ┃       MSE ┃       R2 ┃      MAE ┃
┣━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━┫
┃ Linear Regression ┃ 186.981   ┃ 0.592645 ┃ 9.42245  ┃
┃ Polynomial        ┃  38.5906  ┃ 0.922588 ┃ 3.80802  ┃
┃ Ridge             ┃ 186.982   ┃ 0.592641 ┃ 9.42235  ┃
┃ Lasso             ┃ 187.339   ┃ 0.590803 ┃ 9.43332  ┃
┃ Decision Tree     ┃  31.4541  ┃ 0.938685 ┃ 0.934316 ┃
┃ Random Forest     ┃   9.97211 ┃ 0.98107  ┃ 1.51429  ┃
┃ GradientBoosting  ┃  19.8164  ┃ 0.964971 ┃ 2.4026   ┃
┃ SVR               ┃  38.9529  ┃ 0.93623  ┃ 2.67269  ┃
┃ KNN               ┃  43.6981  ┃ 0.910901 ┃ 2.65345  ┃
┃ BayesianRidge     ┃ 186.999   ┃ 0.592563 ┃ 9.42152  ┃
┃ Huber Regression  ┃ 193.069   ┃ 0.580307 ┃ 9.29507  ┃
┃ XGBRegressor      ┃   6.85371 ┃ 0.988764 ┃ 1.05628  ┃
┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━┛


In [50]:
from tabulate import tabulate
comparison_data = {
    'Model': [
        'Linear Regression', 'Polynomial', 'Decision Tree','Random Forest', 'GradientBoosting', 'SVR', 'KNN', 'XGBRegressor'
    ],
    'MSE': [
        lr_mse, PR_mse, DT_mse, RF_mse, GB_mse, SVR_mse,knn_mse, XGB_mse
    ],
    'R2': [
        lr_r2, PR_r2, DT_r2, RF_r2, GB_r2, SVR_r2, knn_r2,XGB_r2
    ],
    'MAE': [
        lr_mar, PR_mar,  DT_mar, RF_mar, GB_mar, SVR_mar,knn_mar, XGB_mar
    ]
}
comparison_table = tabulate(comparison_data, headers='keys', tablefmt='pretty')
print(comparison_table)

+-------------------+--------------------+--------------------+--------------------+
|       Model       |        MSE         |         R2         |        MAE         |
+-------------------+--------------------+--------------------+--------------------+
| Linear Regression | 186.98080588849706 | 0.5926450298081365 | 9.422448189419034  |
|    Polynomial     | 38.59055488320435  | 0.9225876173874757 | 3.8080247017740545 |
|   Decision Tree   | 31.454148581150484 | 0.938685130151984  | 0.9343162557781206 |
|   Random Forest   | 9.972109201475563  | 0.9810698843261398 |  1.51429122423715  |
| GradientBoosting  | 19.816369957519523 | 0.9649712487309912 | 2.402600527241031  |
|        SVR        | 38.95287543823907  | 0.9362298013698962 | 2.672690800866083  |
|        KNN        | 43.69805824309412  | 0.9109007020078486 | 2.6534537829599816 |
|   XGBRegressor    | 6.853709771315634  | 0.9887635603912082 | 1.056283712906291  |
+-------------------+--------------------+--------------------+--

In [53]:
from tabulate import tabulate
comparison_data = {
    'Model': [
        'Linear Regression', 'Polynomial', 'Decision Tree','Random Forest', 'GradientBoosting', 'SVR', 'KNN', 'XGBRegressor'
    ],
    'MSE': [
        lr_mse, PR_mse, DT_mse, RF_mse, GB_mse, SVR_mse,knn_mse, XGB_mse
    ],
    'MAE': [
        lr_mar, PR_mar,  DT_mar, RF_mar, GB_mar, SVR_mar,knn_mar, XGB_mar
    ],
    'R2': [
        lr_r2, PR_r2, DT_r2, RF_r2, GB_r2, SVR_r2, knn_r2,XGB_r2
    ],

}
comparison_table = tabulate(comparison_data, headers='keys', tablefmt='heavy_outline')
print(comparison_table)

┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓
┃ Model             ┃       MSE ┃      MAE ┃       R2 ┃
┣━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━┫
┃ Linear Regression ┃ 186.981   ┃ 9.42245  ┃ 0.592645 ┃
┃ Polynomial        ┃  38.5906  ┃ 3.80802  ┃ 0.922588 ┃
┃ Decision Tree     ┃  31.4541  ┃ 0.934316 ┃ 0.938685 ┃
┃ Random Forest     ┃   9.97211 ┃ 1.51429  ┃ 0.98107  ┃
┃ GradientBoosting  ┃  19.8164  ┃ 2.4026   ┃ 0.964971 ┃
┃ SVR               ┃  38.9529  ┃ 2.67269  ┃ 0.93623  ┃
┃ KNN               ┃  43.6981  ┃ 2.65345  ┃ 0.910901 ┃
┃ XGBRegressor      ┃   6.85371 ┃ 1.05628  ┃ 0.988764 ┃
┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━┛
