In [1]:
import pandas as pd
import numpy as np

In [2]:
auc_data= pd.read_csv("../Scrapping/ipl_auction_data.csv")
auc_data.head()

Unnamed: 0,Year,Player Name,Team Name,Price (in crores),Status
0,2018,Ms dhoni,Chennai Super Kings,15.0,R
1,2018,Suresh Raina,Chennai Super Kings,11.0,R
2,2018,Deepak chahar,Chennai Super Kings,8.0,S
3,2018,Kedar jadhav,Chennai Super Kings,7.8,S
4,2018,Ravindra jadeja,Chennai Super Kings,7.0,R


In [3]:
auc_data = pd.read_csv("../Scrapping/ipl_auction_data.csv")
mean_price_by_year = auc_data.groupby("Year")["Price (in crores)"].mean().to_dict()
mean_2025 = mean_price_by_year[2025]

auc_data["Normalized Price"] = auc_data.apply(
    lambda row: (row["Price (in crores)"] / mean_price_by_year[row["Year"]]) * mean_2025,
    axis=1
)

auc_data.head()

Unnamed: 0,Year,Player Name,Team Name,Price (in crores),Status,Normalized Price
0,2018,Ms dhoni,Chennai Super Kings,15.0,R,22.598056
1,2018,Suresh Raina,Chennai Super Kings,11.0,R,16.571908
2,2018,Deepak chahar,Chennai Super Kings,8.0,S,12.052297
3,2018,Kedar jadhav,Chennai Super Kings,7.8,S,11.750989
4,2018,Ravindra jadeja,Chennai Super Kings,7.0,R,10.54576


In [4]:
bat_weights = {"BattingAverage": 0.1, "StrikeRate": 0.1, "DBPercent": 0.1, "BdryPercent": 0.1, "TotalRuns": 2, "Balls": 0.5}
bowl_weights = {"StrikeRate": 0.1, "DotBallPercent": 0.1, "BoundaryPercentage":0.1, "Wickets": 2, "EconomyRate": 0.5, "OversBowled": 0.5}

In [5]:
import difflib
import re

def clean_name(name):
    return str(name).strip().lower()

def get_best_match(player, player_list, threshold=0.7):
    player_clean = clean_name(player)
    player_list_clean = [clean_name(p) for p in player_list]
    matches = difflib.get_close_matches(player_clean, player_list_clean, n=1, cutoff=threshold)
    if matches:
        idx = player_list_clean.index(matches[0])
        return player_list[idx]
    return None
def safe_float(val):
    if pd.isna(val): return 0
    val = re.sub(r"[^\d.]", "", str(val))
    try: return float(val)
    except: return 0

In [6]:
final_scores = []
for index, row in auc_data.iterrows():

    
    player = row["Player Name"]
    year = row["Year"]

    bat = pd.read_csv(f"./Performance Stats/top_run_scorers_{year}.csv")
    bowl = pd.read_csv(f"./Performance Stats/most_wickets_{year}.csv")

    bat_players = bat["StrikerName"].tolist()
    bowl_players = bowl["BowlerName"].tolist()

    bat_score = 0
    bowl_score = 0
    

    bat_match = get_best_match(player, bat_players)
    if bat_match:
        bat_row = bat[bat["StrikerName"] == bat_match].iloc[0]
        for col, weight in bat_weights.items():
            value =safe_float(bat_row[col])
            bat_score += value * weight

    bowl_match = get_best_match(player, bowl_players)
    if bowl_match:
        bowl_row = bowl[bowl["BowlerName"] == bowl_match].iloc[0]
        for col, weight in bowl_weights.items():
            value = safe_float(bowl_row[col])
            bowl_score += value * weight

    total_score=bat_score+bowl_score
    final_scores.append(total_score)



auc_data["Score"] = final_scores




In [7]:
filtered_df = auc_data[auc_data["Status"] != "U"]

mean_score_by_year = filtered_df.groupby("Year")["Score"].mean().to_dict()
mean_2025 = mean_score_by_year[2025]

auc_data["Normalized Score"] = auc_data.apply(
    lambda row: (row["Score"] / mean_score_by_year[row["Year"]]) * mean_2025
    if row["Year"] in mean_score_by_year and mean_score_by_year[row["Year"]] != 0 else 0,
    axis=1
)
auc_data.head()

Unnamed: 0,Year,Player Name,Team Name,Price (in crores),Status,Normalized Price,Score,Normalized Score
0,2018,Ms dhoni,Chennai Super Kings,15.0,R,22.598056,1083.649,1121.989412
1,2018,Suresh Raina,Chennai Super Kings,11.0,R,16.571908,1074.952,1112.984705
2,2018,Deepak chahar,Chennai Super Kings,8.0,S,12.052297,178.337,184.646713
3,2018,Kedar jadhav,Chennai Super Kings,7.8,S,11.750989,69.909,72.382439
4,2018,Ravindra jadeja,Chennai Super Kings,7.0,R,10.54576,277.238,287.046913


In [8]:
auc_data = auc_data.drop(["Team Name", "Price (in crores)", "Score"], axis=1)
auc_data.to_csv("Final.csv", index=False)



In [9]:
auc_data.head()

Unnamed: 0,Year,Player Name,Status,Normalized Price,Normalized Score
0,2018,Ms dhoni,R,22.598056,1121.989412
1,2018,Suresh Raina,R,16.571908,1112.984705
2,2018,Deepak chahar,S,12.052297,184.646713
3,2018,Kedar jadhav,S,11.750989,72.382439
4,2018,Ravindra jadeja,R,10.54576,287.046913


In [11]:
auc_data['Player Name'] = auc_data['Player Name'].astype('category').cat.codes
auc_data['Status'] = auc_data['Status'].astype('category').cat.codes
testdata=auc_data[auc_data["Year"]==2025]
testdata=testdata.drop("Normalized Price",axis=1)

In [12]:
auc_data=auc_data[auc_data["Year"]!=2025]
auc_data

Unnamed: 0,Year,Player Name,Status,Normalized Price,Normalized Score
0,2018,990,0,22.598056,1121.989412
1,2018,1557,0,16.571908,1112.984705
2,2018,387,1,12.052297,184.646713
3,2018,733,1,11.750989,72.382439
4,2018,1256,0,10.545760,287.046913
...,...,...,...,...,...
3281,2024,1702,2,0.000000,0.000000
3282,2024,1713,2,0.000000,0.000000
3283,2024,1714,2,0.000000,0.000000
3284,2024,1719,2,0.000000,0.000000


In [18]:
features = ['Year', 'Player Name', 'Status','Normalized Score']
target = 'Normalized Price'

# Feature matrix and target vector
X = auc_data[features]
y = auc_data[target]




In [19]:

from sklearn.ensemble import RandomForestRegressor

# Load data
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)




In [20]:
testdata
X

Unnamed: 0,Year,Player Name,Status,Normalized Score
0,2018,990,0,1121.989412
1,2018,1557,0,1112.984705
2,2018,387,1,184.646713
3,2018,733,1,72.382439
4,2018,1256,0,287.046913
...,...,...,...,...
3281,2024,1702,2,0.000000
3282,2024,1713,2,0.000000
3283,2024,1714,2,0.000000
3284,2024,1719,2,0.000000


In [21]:
predict=rf.predict(testdata)

In [24]:
t=pd.read_csv("Final.csv")
t=t[t["Year"]==2025]
t["Predictions"]=predict

In [25]:
t

Unnamed: 0,Year,Player Name,Status,Normalized Price,Normalized Score,Predictions
3286,2025,Ravindra jadeja,R,18.0,792.778,10.216827
3287,2025,Ruturaj Gaikwad,R,18.0,312.261,3.250197
3288,2025,Matheesha Pathirana,R,13.0,106.906,2.272336
3289,2025,Shivam dube,R,12.0,874.979,8.123334
3290,2025,Noor Ahmad,S,10.0,121.432,1.780191
...,...,...,...,...,...,...
4020,2025,Yeddala Reddy,U,0.0,0.000,0.000000
4021,2025,Yudhvir Charak,U,0.0,32.977,0.000000
4022,2025,Yuvraj Chaudhary,U,0.0,0.000,0.000000
4023,2025,Yuvraj Chudasama,U,0.0,0.000,0.000000
