<a href="https://colab.research.google.com/github/PriyanshuJoshi1982/IPL-Prediction/blob/main/M4_Random_Forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Step 1: Load the data
df = pd.read_csv("/content/M4_Random_Forest.csv")

# Step 2: Label Encoding
le_city = LabelEncoder()
le_team = LabelEncoder()

df['city_enc'] = le_city.fit_transform(df['City'])
df['team1_enc'] = le_team.fit_transform(df['Team1'])
df['team2_enc'] = le_team.transform(df['Team2'])  # Use same encoder for teams
df['toss_enc'] = le_team.transform(df['TossWinner'])  # Toss winner must be either team1 or team2
df['winner_enc'] = le_team.transform(df['Winner'])  # Final winner

# Step 3: Features and target
X = df[['city_enc', 'team1_enc', 'team2_enc', 'toss_enc']]
y = df['winner_enc']

# Step 4: Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Step 6: Evaluation
y_pred = rf_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le_team.classes_))

# Step 7: Confusion matrix
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, fmt='d', xticklabels=le_team.classes_, yticklabels=le_team.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Winner")
plt.ylabel("Actual Winner")
plt.show()

# Step 8: Prediction Function (uses city instead of venue)
def predict_match_winner(city, team1, team2, toss_winner):
    if any(val not in le_team.classes_ for val in [team1, team2, toss_winner]) or city not in le_city.classes_:
        return "Invalid input: unknown team or city."

    input_data = [[
        le_city.transform([city])[0],
        le_team.transform([team1])[0],
        le_team.transform([team2])[0],
        le_team.transform([toss_winner])[0]
    ]]

    prediction = rf_model.predict(input_data)[0]
    predicted_team = le_team.inverse_transform([prediction])[0]
    prob = rf_model.predict_proba(input_data).max()
    return f"Predicted Winner: {predicted_team} with confidence {prob:.2f}"

# Example prediction
print(predict_match_winner("Mumbai", "CSK", "MI", "MI"))


KeyError: 'City'