In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
import pickle
# 1. Load the Data
df = pd.read_csv('final_dataset.csv')
print(f"Original Shape: {df.shape}")

# --- SEARCH AND DESTROY ---
# Check for infinite values before cleaning
count_inf = np.isinf(df.select_dtypes(include=np.number)).sum().sum()
print(f"Infinite values found: {count_inf}")

# Remove rows where ANY column has Infinity
df = df[~df.isin([np.inf, -np.inf]).any(axis=1)]
# Remove rows with Missing Values (NaN)
df.dropna(inplace=True)

print(f"Shape after cleaning: {df.shape}")
# --------------------------

# 2. Separate Inputs (X) and Output (y)
X = df.drop(columns=['result'])
y = df['result']

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# 4. Create Pipeline
trf = ColumnTransformer([
    ('trf', OneHotEncoder(sparse_output=False, drop='first'), ['batting_team', 'bowling_team', 'city'])
], remainder='passthrough')

pipe = Pipeline(steps=[
    ('step1', trf),
    ('step2', LogisticRegression(solver='liblinear'))
])

# 5. Train
print("Training model...")
pipe.fit(X_train, y_train)

# 6. Check Accuracy
y_pred = pipe.predict(X_test)
print(f"--- Accuracy Score: {accuracy_score(y_test, y_pred)*100:.2f}% ---")

# 7. Predict our Scenario
input_df = pd.DataFrame({'batting_team':['Chennai Super Kings'],
                         'bowling_team':['Mumbai Indians'],
                         'city':['Mumbai'],
                         'runs_left':[74],
                         'balls_left':[30],
                         'wickets':[5],
                         'target':[180],
                         'crr':[7.1],
                         'rrr':[14.8]})

prob = pipe.predict_proba(input_df)
print(f"\nCSK Win Probability: {prob[0][1]*100:.2f}%")

# Save the trained pipeline to a file
pickle.dump(pipe, open('pipe.pkl', 'wb'))

print("--- SUCCESS: Model saved as 'pipe.pkl' ---")

Original Shape: (90353, 10)
Infinite values found: 767
Shape after cleaning: (89586, 10)
Training model...
--- Accuracy Score: 81.40% ---

CSK Win Probability: 2.78%
