# Train The Model

## Import Libiraries

In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import pandas as pd
import pickle


## Load the DataSet

In [2]:
cricket = pd.read_csv('final_cricket.csv')

In [3]:
# Column Name
cricket.columns

Index(['Unnamed: 0', 'batting_team', 'bowling_team', 'city', 'current_score',
       'balls_left', 'wicket_left', 'current_run_rate', 'last_five', 'runs_x'],
      dtype='object')

## Training Of Model Start 

In [4]:
# Train and test DataSet
X = cricket.drop(columns=['runs_x','Unnamed: 0'],axis=1)
y = cricket['runs_x']

In [5]:
# Train test Split 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [6]:
X_train

Unnamed: 0,batting_team,bowling_team,city,current_score,balls_left,wicket_left,current_run_rate,last_five
27755,India,New Zealand,Wellington,100,28,5,6.521739,34.0
20525,New Zealand,South Africa,Durban,66,32,2,4.500000,20.0
12493,India,Pakistan,Johannesburg,65,61,8,6.610169,26.0
10721,Pakistan,New Zealand,Auckland,62,69,5,7.294118,26.0
25673,Pakistan,England,Dubai,71,63,7,7.473684,28.0
...,...,...,...,...,...,...,...,...
11284,Australia,India,Melbourne,22,98,9,6.000000,22.0
44732,England,Pakistan,Abu Dhabi,36,88,9,6.750000,34.0
38158,Sri Lanka,West Indies,London,48,75,10,6.400000,37.0
860,Sri Lanka,Bangladesh,Colombo,35,83,6,5.675676,28.0


In [7]:
#  Apply preprocessing transformations to specific columns in a dataset while leaving the remaining columns unchanged.
transformer = ColumnTransformer([
    ('transformer', OneHotEncoder(sparse_output=False, drop='first'),['batting_team','bowling_team', 'city'])
], remainder='passthrough')


In [8]:
# A pipeline for data preprocessing and XGBoost regression
model = Pipeline(steps=[
    ('step1', transformer),
    ('step2', StandardScaler()),
    ('step3', XGBRegressor(n_estimators=1000, learning_rate=0.2, max_depth=12, random_state=1))
])

In [9]:
# Fit the model
model.fit(X_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [10]:
# Make the predictions
y_predictions = model.predict(X_test)

In [11]:
# R2 Score
r2_score(y_test, y_predictions)

0.9567325711250305

In [12]:
# Mean Absolute Error
mean_absolute_error(y_test, y_predictions)

2.8124591311300917

In [13]:
pickle.dump(model, open('cricket_predition.pkl', 'wb'))