In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import pickle

# Load datasets
matches = pd.read_csv("matches.csv")
deliveries = pd.read_csv("deliveries.csv")

# Add is_four and is_six columns to deliveries
deliveries['is_four'] = deliveries['batsman_runs'] == 4
deliveries['is_six'] = deliveries['batsman_runs'] == 6

# Rename for merging
matches.rename(columns={'id': 'match_id'}, inplace=True)

# Aggregate inning 1 data
agg = deliveries[deliveries['inning'] == 1].groupby('match_id').agg({
    'is_four': 'sum',
    'is_six': 'sum',
    'total_runs': 'sum'
}).reset_index().rename(columns={
    'is_four': 'total_fours',
    'is_six': 'total_sixes'
})

# Merge with matches
df = matches.merge(agg, on='match_id', how='left')

# Drop missing data
df = df.dropna(subset=['total_runs'])

# Select features and target
df['our_team'] = df['team1']
df['opponent_team'] = df['team2']
df['role'] = df['toss_decision'].apply(lambda x: 'bat' if x == 'bat' else 'bowl')

X = df[['venue', 'our_team', 'opponent_team', 'role']]
y = df['total_runs']

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), ['city', 'venue', 'our_team', 'opponent_team', 'role'])
])

model_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit and save model
model_pipeline.fit(X_train, y_train)

with open("score_predictor_model.pkl", "wb") as f:
    pickle.dump(model_pipeline, f)

print("✅ Model trained and saved as score_predictor_model.pkl")


✅ Model trained and saved as score_predictor_model.pkl
