<a href="https://colab.research.google.com/github/PriyanshuJoshi1982/IPL-Prediction/blob/main/Location_Team_Performance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the dataset
df = pd.read_csv("/content/M3_Location_Performance.csv", names=['venue', 'team', 'result'])  # Add header names

# Step 2: Clean data
df.dropna(inplace=True)
df['venue'] = df['venue'].str.strip()
df['team'] = df['team'].str.strip()
df['result'] = df['result'].str.strip().str.lower()

# Step 3: Encode categorical variables
le_venue = LabelEncoder()
le_team = LabelEncoder()
le_result = LabelEncoder()

df['venue_encoded'] = le_venue.fit_transform(df['venue'])
df['team_encoded'] = le_team.fit_transform(df['team'])
df['result_encoded'] = le_result.fit_transform(df['result'])  # Won=1, Loss=0

# Step 4: Split dataset
X = df[['venue_encoded', 'team_encoded']]
y = df['result_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Step 7: Predict probability of win for a team at a venue
def predict_win_probability(team_name, venue_name):
    team_encoded = le_team.transform([team_name])[0]
    venue_encoded = le_venue.transform([venue_name])[0]
    prob = model.predict_proba([[venue_encoded, team_encoded]])[0][1]
    return round(prob, 3)

# Example Usage
team_input = "India"
venue_input = "MCG"
print(f"\nEstimated probability of {team_input} winning at {venue_input}: {predict_win_probability(team_input, venue_input)}")

# Optional: Heatmap for visual team performance at venues
performance = df.groupby(['venue', 'team'])['result_encoded'].mean().unstack()
plt.figure(figsize=(14, 8))
sns.heatmap(performance, annot=True, cmap='YlGnBu', fmt=".2f")
plt.title("Win Probability (Team vs Venue)")
plt.ylabel("Venue")
plt.xlabel("Team")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()



Classification Report:
              precision    recall  f1-score   support

           0       0.45      0.83      0.58       197
           2       0.56      0.17      0.26       242

    accuracy                           0.47       439
   macro avg       0.51      0.50      0.42       439
weighted avg       0.51      0.47      0.41       439

Accuracy Score: 0.46924829157175396


ValueError: y contains previously unseen labels: 'India'

# New Section