# Train The Model

## Import Libiraries

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import pandas as pd
import pickle


## Load the DataSet

In [9]:
cricket = pd.read_csv('final_cricket.csv')

In [None]:
# Column Name
cricket.columns

In [12]:
# Train and test DataSet
X = cricket.drop(columns=['runs_x','Unnamed: 0'],axis=1)
y = cricket['runs_x']

In [14]:
# Train test Split 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
X_train

In [16]:
#  Apply preprocessing transformations to specific columns in a dataset while leaving the remaining columns unchanged.
transformer = ColumnTransformer([
    ('transformer', OneHotEncoder(sparse_output=False, drop='first'),['batting_team','bowling_team', 'city'])
], remainder='passthrough')


In [18]:
# A pipeline for data preprocessing and XGBoost regression
model = Pipeline(steps=[
    ('step1', transformer),
    ('step2', StandardScaler()),
    ('step3', XGBRegressor(n_estimators=1000, learning_rate=0.2, max_depth=12, random_state=1))
])

In [None]:
# Fit the model
model.fit(X_train,y_train)

In [20]:
# Make the predictions
y_predictions = model.predict(X_test)

In [None]:
# R2 Score
r2_score(y_test, y_predictions)

In [None]:
# Mean Absolute Error
mean_absolute_error(y_test, y_predictions)

In [None]:
pickle.dump(model, open('cricket_predition.pkl', 'wb'))