In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import re

cricket_data = pd.read_csv('IPLPlayerAuctionData.csv')
ipl_prices = pd.read_csv('ipl_retention_prices.csv')

# Convert `Amount` to crores
cricket_data['Amount_Crores'] = cricket_data['Amount'] / 1e7

# Identify retention price columns dynamically
price_columns = [col for col in ipl_prices.columns if re.search(r'\d{4} Retention Price \(INR Crores\)', col)]

# Process `ipl_prices` to get average price per player over all years present in the dataset
ipl_prices['Average_Price_Crores'] = ipl_prices[price_columns].mean(axis=1, skipna=True)

# Merge both datasets on player name
cricket_data = cricket_data.rename(columns={'Player': 'Player_Name'})
ipl_prices = ipl_prices.rename(columns={'Player Name': 'Player_Name'})
merged_prices = pd.concat([cricket_data[['Player_Name', 'Amount_Crores']],
                           ipl_prices[['Player_Name', 'Average_Price_Crores']]], axis=0)
average_prices = merged_prices.groupby('Player_Name').max()

# Load the dataset
df = pd.read_csv('cricket_data.csv')

# Remove rows where 'Year' is 'No stats'
df = df[df['Year'] != 'No stats']

# Clean data in 'Highest_Score' column by removing '*'
df['Highest_Score'] = df['Highest_Score'].str.replace('*', '', regex=False)

# Convert specified columns to numeric type
numeric_cols = ['Runs_Scored', 'Highest_Score', 'Batting_Average', 'Batting_Strike_Rate',
                'Centuries', 'Half_Centuries', 'Fours', 'Sixes', 'Wickets_Taken',
                'Bowling_Average', 'Economy_Rate', 'Bowling_Strike_Rate', 'Four_Wicket_Hauls',
                'Five_Wicket_Hauls', 'Catches_Taken', 'Stumpings']

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Define weights for batting metrics
batting_weights = {
    'Runs_Scored': 0.20,
    'Highest_Score': 0.10,
    'Batting_Average': 0.20,
    'Batting_Strike_Rate': 0.25,
    'Centuries': 0.10,
    'Half_Centuries': 0.05,
    'Fours': 0.05,
    'Sixes': 0.05
}

# Function to calculate batting rating
def calculate_bat_rating(row):
    return sum(row[col] * weight for col, weight in batting_weights.items() if not pd.isnull(row[col]))

# Calculate raw batting ratings and create the 'Bat_Rating' column
df['Bat_Rating'] = df.apply(calculate_bat_rating, axis=1)

# Normalize Bat_Rating to range [0, 1]
scaler = MinMaxScaler()
df['Normalized_Bat_Rating'] = scaler.fit_transform(df[['Bat_Rating']]) * 100  # Convert to percentage

# Display top 15 batsmen with headers
average_bat_ratings = df.groupby('Player_Name')['Normalized_Bat_Rating'].mean().sort_values(ascending=False)
print("\nTop 15 Batsmen:\n")
print(f"{'Name':<20} {'Rating':<10}")
print(average_bat_ratings.head(15).to_string(header=False))

# Define weights for bowling metrics
bowling_weights = {
    'Wickets_Taken': 0.30,
    'Economy_Rate': 0.25,
    'Bowling_Average': 0.20,
    'Bowling_Strike_Rate': 0.15,
    'Four_Wicket_Hauls': 0.05,
    'Five_Wicket_Hauls': 0.05
}

# Function to calculate bowling rating
def calculate_bowling_rating(row):
    bowling_rating = 0
    for col, weight in bowling_weights.items():
        if not pd.isnull(row[col]):
            if col in ['Economy_Rate', 'Bowling_Average', 'Bowling_Strike_Rate']:
                bowling_rating += weight / (1 + row[col])
            else:
                bowling_rating += row[col] * weight
    return bowling_rating

# Calculate raw bowling ratings and create the 'Bowling_Rating' column
df['Bowling_Rating'] = df.apply(calculate_bowling_rating, axis=1)

# Normalize Bowling_Rating to range [0, 1]
df['Normalized_Bowling_Rating'] = scaler.fit_transform(df[['Bowling_Rating']]) * 100  # Convert to percentage

# Display top 15 bowlers with headers
average_bowling_ratings = df.groupby('Player_Name')['Normalized_Bowling_Rating'].mean().sort_values(ascending=False)
print("\nTop 15 Bowlers:\n")
print(f"{'Name':<20} {'Rating':<10}")
print(average_bowling_ratings.head(15).to_string(header=False))

# Define weights for keeper metrics
keeper_weights = {
    'Runs_Scored': 0.20,
    'Catches_Taken': 0.40,
    'Stumpings': 0.40
}

# Function to calculate keeper rating
def calculate_keeper_rating(row):
    return sum(row[col] * weight for col, weight in keeper_weights.items() if not pd.isnull(row[col]))

# Calculate raw keeper ratings and create the 'Keeper_Rating' column
df['Keeper_Rating'] = df.apply(calculate_keeper_rating, axis=1)

# Normalize Keeper_Rating to range [0, 1]
df['Normalized_Keeper_Rating'] = scaler.fit_transform(df[['Keeper_Rating']]) * 100  # Convert to percentage

# Display top 15 keepers with headers
average_keeper_ratings = df.groupby('Player_Name')['Normalized_Keeper_Rating'].mean().sort_values(ascending=False)
print("\nTop 15 Wicket Keepers:\n")
print(f"{'Name':<20} {'Rating':<10}")
print(average_keeper_ratings.head(15).to_string(header=False))

# Calculate All-Rounder Rating for players who meet the 20% difference condition
all_rounder_ratings = df[
    abs(df['Normalized_Bat_Rating'] - df['Normalized_Bowling_Rating']) <= 20
].copy()

# Calculate combined All-Rounder Rating based on both Batting and Bowling Ratings
all_rounder_ratings['All_Rounder_Rating'] = (
    all_rounder_ratings['Normalized_Bat_Rating'] * 0.5 + 
    all_rounder_ratings['Normalized_Bowling_Rating'] * 0.5
)

# Calculate average all-rounder rating by player and sort
average_all_rounder_ratings = all_rounder_ratings.groupby('Player_Name')['All_Rounder_Rating'].mean().sort_values(ascending=False)
print("\nTop 15 All-Rounders:\n")
print(f"{'Name':<20} {'Rating':<10}")
print(average_all_rounder_ratings.head(15).to_string(header=False))

# Select team based on user input
num_bat = int(input("Enter the number of batsmen you want: "))
num_bowl = int(input("Enter the number of bowlers you want: "))
num_allrounder = int(input("Enter the number of all-rounders you want: "))
num_keeper = int(input("Enter the number of wicket-keepers you want: "))

# Initialize lists to hold selected players in each role
selected_batsmen = []
selected_bowlers = []
selected_keepers = []
selected_allrounders = []

# Track players that have already been selected in any role
selected_players = set()

# Helper function to add a player to the appropriate list if not already selected
def add_player(rating_list, num_needed, selected_list, selected_players):
    count = 0
    for player in rating_list.index:
        if player not in selected_players:
            selected_list.append(player)
            selected_players.add(player)
            count += 1
        if count == num_needed:
            break

# Allocate players based on the highest rating they have in any category
# Batsmen
add_player(average_bat_ratings, num_bat, selected_batsmen, selected_players)

# Bowlers
add_player(average_bowling_ratings, num_bowl, selected_bowlers, selected_players)

# Wicket Keepers
add_player(average_keeper_ratings, num_keeper, selected_keepers, selected_players)

# All-Rounders
add_player(average_all_rounder_ratings, num_allrounder, selected_allrounders, selected_players)

# Display the dream team without duplicates
print("\nYour Dream Team:\n")

print("Batsmen:")
print("\n".join(selected_batsmen))

print("\nBowlers:")
print("\n".join(selected_bowlers))

print("\nWicket Keepers:")
print("\n".join(selected_keepers))

print("\nAll Rounders:")
print("\n".join(selected_allrounders))



# Function to select dream team with budget
def calculate_price_multiplier(rating, avg_rating, std_dev):
    return 1 + (rating - avg_rating) * 0.1 / std_dev if std_dev != 0 else 1

def dream_team_with_budget(df, average_bat_ratings, average_bowling_ratings, average_keeper_ratings, 
                           average_all_rounder_ratings, average_prices):

    total_budget = float(input("Enter your total budget in crores: "))
    bat_percent = float(input("Enter percentage of budget for batsmen: ")) / 100
    bowl_percent = float(input("Enter percentage of budget for bowlers: ")) / 100
    keeper_percent = float(input("Enter percentage of budget for keepers: ")) / 100
    allrounder_percent = float(input("Enter percentage of budget for all-rounders: ")) / 100

    budget_batsmen = total_budget * bat_percent
    budget_bowlers = total_budget * bowl_percent
    budget_keepers = total_budget * keeper_percent
    budget_allrounders = total_budget * allrounder_percent

    dream_team = {"Batsmen": [], "Bowlers": [], "Keepers": [], "All-Rounders": []}

    # Function to select players within budget
    def select_within_budget(ratings, budget, role):
        avg_rating = ratings.mean()
        std_dev = ratings.std()
        players_selected = []
        total_cost = 0
        for player, rating in ratings.items():
            avg_price = average_prices.loc[player, 'Average_Price_Crores'] if player in average_prices.index else None
            if avg_price is None:
                print(f"Player {player} not in dataset; moving to the next player.")
                continue
            multiplier = calculate_price_multiplier(rating, avg_rating, std_dev)
            predicted_price = avg_price * multiplier
            if total_cost + predicted_price <= budget:
                players_selected.append((player, predicted_price))
                total_cost += predicted_price
            elif len(players_selected) > 0:
                last_player, last_price = players_selected[-1]
                if total_cost - last_price + predicted_price <= budget:
                    players_selected[-1] = (player, predicted_price)
                    total_cost = total_cost - last_price + predicted_price
                    print(f"Replaced {last_player} with {player} to fit budget in {role}.")
        return players_selected

    # Select players for each role within the budget
    dream_team["Batsmen"] = select_within_budget(average_bat_ratings, budget_batsmen, "Batsmen")
    dream_team["Bowlers"] = select_within_budget(average_bowling_ratings, budget_bowlers, "Bowlers")
    dream_team["Keepers"] = select_within_budget(average_keeper_ratings, budget_keepers, "Keepers")
    dream_team["All-Rounders"] = select_within_budget(average_all_rounder_ratings, budget_allrounders, "All-Rounders")

    # Print the final dream team with predicted prices
    print("\nFinal Dream Team with Predicted Prices:\n")
    for role, players in dream_team.items():
        print(f"\n{role}:")
        for player, price in players:
            print(f"{player:<20} Predicted Price: {price:.2f} crores")

# Call the function with ratings
dream_team_with_budget(df, average_bat_ratings, average_bowling_ratings, average_keeper_ratings, 
                       average_all_rounder_ratings, average_prices)




All Players by Batting Rating:
 Player_Name
Devon Conway           0.561360
David Warner           0.524214
Virat Kohli            0.512859
Shubman Gill           0.492449
Kyle Mayers            0.480128
                         ...   
Matheesha Pathirana    0.000000
Suyash Sharma          0.000000
Sai Kishore            0.000000
Luke Wood              0.000000
Joshua Little          0.000000
Name: Normalized_Bat_Rating, Length: 206, dtype: float64 

Top 10 Batsmen:
 Player_Name
Devon Conway       0.561360
David Warner       0.524214
Virat Kohli        0.512859
Shubman Gill       0.492449
Kyle Mayers        0.480128
KL Rahul           0.479368
Shikhar Dhawan     0.475986
Jos Buttler        0.471306
Ruturaj Gaikwad    0.469903
Rishabh Pant       0.463063
Name: Normalized_Bat_Rating, dtype: float64 



All Players by Bowling Rating:
 Player_Name
Rashid Khan         0.528927
Mukesh Choudhary    0.484331
Noor Ahmad          0.479547
Yuzvendra Chahal    0.475073
Kagiso Rabada       0.4704