<a href="https://colab.research.google.com/github/Manikandan089/project/blob/main/ml_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Import the dataset
ipl_df = pd.read_csv('ipl_data.csv')
print(f"Dataset successfully Imported of Shape : {ipl_df.shape}")

# Drop irrelevant columns
irrelevant = ['mid', 'date', 'venue','batsman', 'bowler', 'striker', 'non-striker']
print(f'Before Removing Irrelevant Columns : {ipl_df.shape}')
ipl_df = ipl_df.drop(irrelevant, axis=1)
print(f'After Removing Irrelevant Columns : {ipl_df.shape}')
print(ipl_df.head())

# Keep consistent teams
const_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
              'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',
              'Delhi Daredevils', 'Sunrisers Hyderabad']
print(f'Before Removing Inconsistent Teams : {ipl_df.shape}')
ipl_df = ipl_df[(ipl_df['bat_team'].isin(const_teams)) & (ipl_df['bowl_team'].isin(const_teams))]
print(f'After Removing Inconsistent Teams : {ipl_df.shape}')
print(f'Before Removing Overs : {ipl_df.shape}')
ipl_df = ipl_df[ipl_df['overs'] >= 5.0]
print(f'After Removing Overs : {ipl_df.shape}')

# Perform Label Encoding for 'bat_team' and 'bowl_team'
le = LabelEncoder()
ipl_df['bat_team'] = le.fit_transform(ipl_df['bat_team'])
ipl_df['bowl_team'] = le.fit_transform(ipl_df['bowl_team'])

# Perform One Hot Encoding and transform columns
columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [0, 1])], remainder='passthrough')
ipl_df = np.array(columnTransformer.fit_transform(ipl_df))

# Model Building
features = ipl_df[:, :-1]
labels = ipl_df[:, -1]

# Splitting data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.20, shuffle=True)

# Initialize the Decision Tree Regressor
dtr = DecisionTreeRegressor()

# Train the model
dtr.fit(train_features, train_labels)

# Evaluate the model
train_score_dtr = dtr.score(train_features, train_labels) * 100
test_score_dtr = dtr.score(test_features, test_labels) * 100
print(f'Train Score : {train_score_dtr:.2f}%\nTest Score : {test_score_dtr:.2f}%')

# Model Evaluation
print("---- Decision Tree Regression - Model Evaluation ----")
print("Mean Absolute Error (MAE): {}".format(mae(test_labels, dtr.predict(test_features))))
print("Mean Squared Error (MSE): {}".format(mse(test_labels, dtr.predict(test_features))))
print("Root Mean Squared Error (RMSE): {}".format(np.sqrt(mse(test_labels, dtr.predict(test_features)))))

# Prediction function
def score_predict(batting_team, bowling_team, runs, wickets, overs, runs_last_5, wickets_last_5, model=dtr, encoder=le):
    # Transform team names to indices
    batting_team_idx = encoder.transform([batting_team])[0]
    bowling_team_idx = encoder.transform([bowling_team])[0]

    # Combine all features into a single array
    prediction_array = [0] * (features.shape[1])
    prediction_array[batting_team_idx] = 1
    prediction_array[8 + bowling_team_idx] = 1
    prediction_array[-5:] = [runs, wickets, overs, runs_last_5, wickets_last_5]
    prediction_array = np.array(prediction_array).reshape(1, -1)
    predicted_score = model.predict(prediction_array)
    return predicted_score

batting_team = input("Enter batting team: ")
bowling_team = input("Enter bowling team: ")
runs = float(input("Enter runs scored: "))
wickets = int(input("Enter wickets lost: "))
overs = float(input("Enter overs bowled: "))
runs_last_5 = int(input("Enter runs scored in last 5 overs: "))
wickets_last_5 = int(input("Enter wickets lost in last 5 overs: "))

# Test the prediction function
predicted_score = score_predict(batting_team, bowling_team, runs, wickets, overs, runs_last_5, wickets_last_5)
print(f'Predicted Score: {predicted_score[0]}')


Dataset successfully Imported of Shape : (76014, 15)
Before Removing Irrelevant Columns : (76014, 15)
After Removing Irrelevant Columns : (76014, 8)
                bat_team                    bowl_team  runs  wickets  overs  \
0  Kolkata Knight Riders  Royal Challengers Bangalore     1        0    0.1   
1  Kolkata Knight Riders  Royal Challengers Bangalore     1        0    0.2   
2  Kolkata Knight Riders  Royal Challengers Bangalore     2        0    0.2   
3  Kolkata Knight Riders  Royal Challengers Bangalore     2        0    0.3   
4  Kolkata Knight Riders  Royal Challengers Bangalore     2        0    0.4   

   runs_last_5  wickets_last_5  total  
0            1               0    222  
1            1               0    222  
2            2               0    222  
3            2               0    222  
4            2               0    222  
Before Removing Inconsistent Teams : (76014, 8)
After Removing Inconsistent Teams : (53811, 8)
Before Removing Overs : (53811, 8)
After 