In [190]:
import requests
from bs4 import BeautifulSoup

# URL of the Women's Premier League 2024 matches page
url = 'https://www.cricbuzz.com/cricket-series/7518/womens-premier-league-2024/matches'

scorecard_urls = []

# Fetch the page content
response = requests.get(url)
if response.status_code == 200:
    page_content = response.text
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")
    exit()

# Parse the HTML content
soup = BeautifulSoup(page_content, 'html.parser')

# Find all match divs
match_divs = soup.find_all('div', class_='cb-col-60 cb-col cb-srs-mtchs-tm')

matches = []

for match_div in match_divs:
    # Extract match details
    match_info = {}
    
    # Extract teams and match number
    match_link = match_div.find('a', class_='text-hvr-underline')
    if match_link:
        match_info['match_title'] = match_link.text.strip()
        match_info['scorecard_url'] = match_link['href']
    
    # Extract venue
    venue_div = match_div.find('div', class_='text-gray')
    if venue_div:
        match_info['venue'] = venue_div.text.strip()
    
    # Extract result
    result_link = match_div.find('a', class_='cb-text-complete')
    if result_link:
        match_info['result'] = result_link.text.strip()
    
    matches.append(match_info)

# Output the extracted match information
for match in matches:
    print(f"Match: {match.get('match_title', 'N/A')}")
    print(f"Venue: {match.get('venue', 'N/A')}")
    print(f"Result: {match.get('result', 'N/A')}")
    print(f"Scorecard URL: https://www.cricbuzz.com{match.get('scorecard_url', 'N/A')}")
    print("-" * 40)
    scorecard_urls.append(f"https://www.cricbuzz.com{match.get('scorecard_url', 'N/A')}")

Match: MUMBAI INDIANS WOMEN vs DELHI CAPITALS WOMEN, 1st Match
Venue: M.Chinnaswamy Stadium, Bengaluru
Result: Mumbai Indians Women won by 4 wkts
Scorecard URL: https://www.cricbuzz.com/cricket-scores/88424/miw-vs-dcw-1st-match-womens-premier-league-2024
----------------------------------------
Match: ROYAL CHALLENGERS BENGALURU WOMEN vs UP WARRIORZ WOMEN, 2nd Match
Venue: M.Chinnaswamy Stadium, Bengaluru
Result: Royal Challengers Bangalore Women won by 2 runs
Scorecard URL: https://www.cricbuzz.com/cricket-scores/88431/rcbw-vs-upw-2nd-match-womens-premier-league-2024
----------------------------------------
Match: GUJARAT GIANTS WOMEN vs MUMBAI INDIANS WOMEN, 3rd Match
Venue: M.Chinnaswamy Stadium, Bengaluru
Result: Mumbai Indians Women won by 5 wkts
Scorecard URL: https://www.cricbuzz.com/cricket-scores/88438/ggtw-vs-miw-3rd-match-womens-premier-league-2024
----------------------------------------
Match: UP WARRIORZ WOMEN vs DELHI CAPITALS WOMEN, 4th Match
Venue: M.Chinnaswamy Stadiu

In [191]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Initialize empty DataFrames for all matches
all_batting_data = []
all_bowling_data = []
all_fielding_data = []
all_potm_data = []

# Configure session with retries
session = requests.Session()
retry_strategy = Retry(
    total=5,
    backoff_factor=2,
    status_forcelist=[429, 500, 502, 503, 504],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
session.mount("http://", adapter)

def extract_match_id(scorecard_url):
    """Extract match_id from the scorecard URL"""
    return scorecard_url.split("/")[-2]  # Extracts the match_id from the URL

def fetch_scorecard(match_id):
    """Fetch scorecard HTML from Cricbuzz API with retry handling"""
    api_url = f"https://www.cricbuzz.com/api/html/cricket-scorecard/{match_id}"
    try:
        response = session.get(api_url, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching scorecard for match {match_id}: {e}")
        return None

def fetch_full_name(player_url):
    """Fetch full player name from their profile page with retry handling"""
    base_url = "https://www.cricbuzz.com"
    full_url = base_url + player_url  # Build full URL
    try:
        response = session.get(full_url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        name_tag = soup.find('h1', class_='cb-font-40')  # Player full name appears here
        return name_tag.text.strip() if name_tag else "N/A"
    except requests.exceptions.RequestException as e:
        print(f"Error fetching player name from {full_url}: {e}")
        return "N/A"

def extract_potm(match_url):
    """Extract Player of the Match information from the match page with retry handling"""
    try:
        response = session.get(match_url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        potm_div = soup.find('div', class_='cb-col cb-col-50 cb-mom-itm')
        if potm_div:
            player_link = potm_div.find('a', class_='cb-link-undrln')
            if player_link:
                player_name = player_link.text.strip()
                player_id = player_link['href'].split('/')[-2]  # Extract player ID from URL
                return pd.DataFrame([{'Match_ID': match_url.split('/')[-2], 'Player_Name': player_name, 'Player_ID': player_id}])
    except requests.exceptions.RequestException as e:
        print(f"Error fetching POTM data from {match_url}: {e}")
    return None

def parse_scorecard(html_content):
    """Parse batting, bowling, and fielding tables from the scorecard"""
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract team names
    team_names = []
    innings_headers = soup.find_all('div', class_='cb-col cb-col-100 cb-scrd-hdr-rw')
    for header in innings_headers:
        team_name = header.text.split('Innings')[0].strip()
        team_names.append(team_name)

    # Initialize data structures
    batting_tables = []
    bowling_tables = []
    fielding_stats_by_innings = {1: {}, 2: {}}  # Track fielding stats per innings
    team_player_mapping = {}  # Add this line to initialize the mapping
    current_innings = 0  # Add this to track current innings

    # Extract batting tables
    for table in soup.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr'):
        if "Batter" in table.text or "Batsman" in table.text:  # Identify batting tables for multiple innings
            current_innings += 1  # Increment innings counter
            rows = []
            batting_team = team_names[current_innings - 1]  # Use current_innings instead of innings
            for row in table.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms'):
                cols = row.find_all('div')
                if len(cols) >= 7:  # Ensure it's a valid batting row
                    player_name = cols[0].text.strip()
                    team_player_mapping[player_name] = batting_team  # Map player to their team
                    player_link = cols[0].find('a')  # Get player profile link
                    full_name = fetch_full_name(player_link['href']) if player_link else player_name
                    
                    dismissal_info = cols[1].text.strip().lower()  # Get dismissal details
                    runs = cols[2].text.strip()
                    balls = cols[3].text.strip()
                    fours = cols[4].text.strip()
                    sixes = cols[5].text.strip()
                    strike_rate = cols[6].text.strip()
                    
                    rows.append([full_name, player_name, runs, balls, fours, sixes, strike_rate])

                    # Initialize fielding stats for this innings if not exists
                    current_fielding_stats = fielding_stats_by_innings[current_innings]
                    
                    # Handle caught & bowled
                    if 'c & b' in dismissal_info:
                        fielder = dismissal_info.split('c & b')[1].strip()
                        current_fielding_stats[fielder] = current_fielding_stats.get(fielder, {"Catches": 0, "Stumpings": 0, "Run Outs": 0})
                        current_fielding_stats[fielder]["Catches"] += 1
                    else:
                        # Handle regular catches: "c Player Name b Bowler Name"
                        # Using negative lookahead to ensure we don't match 'b' that's part of a name
                        catch_match = re.search(r'c (.*?)\s+(?=b\s+)', dismissal_info)
                        if catch_match:
                            fielder = catch_match.group(1).strip()
                            current_fielding_stats[fielder] = current_fielding_stats.get(fielder, {"Catches": 0, "Stumpings": 0, "Run Outs": 0})
                            current_fielding_stats[fielder]["Catches"] += 1

                    # Handle stumpings
                    stumping_match = re.search(r'st (\w+ \w+)', dismissal_info)
                    if stumping_match:
                        fielder = stumping_match.group(1).strip()
                        current_fielding_stats[fielder] = current_fielding_stats.get(fielder, {"Catches": 0, "Stumpings": 0, "Run Outs": 0})
                        current_fielding_stats[fielder]["Stumpings"] += 1

                    # Handle run outs
                    runout_match = re.search(r'run out \(([\w\s/]+)\)', dismissal_info)
                    if runout_match:
                        fielders = runout_match.group(1).strip().split("/")
                        for fielder in fielders:
                            fielder = fielder.strip()
                            current_fielding_stats[fielder] = current_fielding_stats.get(fielder, {"Catches": 0, "Stumpings": 0, "Run Outs": 0})
                            current_fielding_stats[fielder]["Run Outs"] += 1

            # After the batting tables extraction, modify the DNB players code:
            dnb_players = []
            for innings_div in soup.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms'):
                if 'Did not Bat' in innings_div.text:
                    # Find all player links in the DNB section
                    dnb_links = innings_div.find_all('a', class_='cb-text-link')
                    for player_link in dnb_links:
                        player_name = player_link.text.strip()
                        if player_name:  # Only add non-empty names
                            full_name = fetch_full_name(player_link['href'])
                            # Add current innings number and batting team to DNB players
                            dnb_players.append([full_name, player_name, current_innings, batting_team])

            # Convert DNB players to DataFrame with innings and team information
            df_dnb = pd.DataFrame(dnb_players, columns=['Full Name', 'Batsman', 'Innings', 'Team']) if dnb_players else pd.DataFrame(columns=['Full Name', 'Batsman', 'Innings', 'Team'])
            
            batting_tables.append(rows)

    # Extract bowling tables
    for table in soup.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr'):
        if "Bowler" in table.text:  # Identify bowling tables
            rows = []
            for row in table.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms'):
                cols = row.find_all('div')
                if len(cols) >= 8:  # Ensure it's a valid bowling row
                    player_name = cols[0].text.strip()
                    player_link = cols[0].find('a')  # Get player profile link
                    full_name = fetch_full_name(player_link['href']) if player_link else player_name
                    
                    overs = cols[1].text.strip()
                    maidens = cols[2].text.strip()
                    runs = cols[3].text.strip()
                    wickets = cols[4].text.strip()
                    no_balls = cols[5].text.strip()
                    wides = cols[6].text.strip()
                    economy = cols[7].text.strip()
                    
                    rows.append([full_name, player_name, overs, maidens, runs, wickets, no_balls, wides, economy])
            bowling_tables.append(rows)

    # Convert fielding stats to DataFrame format
    fielding_data = []
    for innings, stats in fielding_stats_by_innings.items():
        for fielder, contributions in stats.items():
            proper_name = ' '.join(word.capitalize() for word in fielder.split())
            fielding_data.append([
                proper_name,
                contributions["Catches"],
                contributions["Stumpings"],
                contributions["Run Outs"],
                innings,  # Add innings number
                team_names[0] if innings == 2 else team_names[1]  # Add correct bowling team
            ])

    df_fielding = pd.DataFrame(
        fielding_data,
        columns=["Player", "Catches", "Stumpings", "Run Outs", "Innings", "Team"]
    )

    return batting_tables, bowling_tables, df_fielding, team_player_mapping, team_names, df_dnb

def process_match(scorecard_url, match_index, total_matches):
    """Process a single match and append its data to global lists with progress tracking"""
    match_id = extract_match_id(scorecard_url)
    print(f"Processing match {match_index + 1} of {total_matches}: Match ID {match_id}")
    scorecard_html = fetch_scorecard(match_id)
    if scorecard_html:
        batting_data, bowling_data, df_fielding, team_player_mapping, team_names, df_dnb = parse_scorecard(scorecard_html)
        
        # Process Batting Data
        all_batters = []
        for innings, batting_table in enumerate(batting_data, 1):
            df = pd.DataFrame(batting_table, columns=["Full Name", "Batsman", "Runs", "Balls", "4s", "6s", "SR"])
            df['Innings'] = innings
            df['Match_ID'] = match_id
            df['Team'] = team_names[innings-1]
            all_batting_data.append(df)
        
        # Process Bowling Data
        for innings, bowling_table in enumerate(bowling_data, 1):
            df = pd.DataFrame(bowling_table, columns=["Full Name", "Bowler", "Overs", "Maidens", "Runs", "Wickets", "No Balls", "Wides", "Econ"])
            df['Innings'] = innings
            df['Match_ID'] = match_id
            df['Team'] = team_names[1 if innings == 1 else 0]
            all_bowling_data.append(df)

        # Add DNB players if available
        if not df_dnb.empty:
            # DNB players already have innings and team information
            df_dnb['Match_ID'] = match_id
            df_dnb['Runs'] = 0
            df_dnb['Balls'] = 0
            df_dnb['4s'] = 0
            df_dnb['6s'] = 0
            df_dnb['SR'] = 0
            all_batters.append(df_dnb)
        
        all_batting_data.extend(all_batters)
        
        # Process Fielding Data
        df_fielding['Match_ID'] = match_id
        df_fielding['Team'] = df_fielding['Innings'].map({1: team_names[1], 2: team_names[0]})
        
        all_fielding_data.append(df_fielding)
        
        # Extract Player of the Match
        match_url = scorecard_url.replace('/live-cricket-scorecard/', '/cricket-scores/')
        df_potm = extract_potm(match_url)
        if df_potm is not None:
            all_potm_data.append(df_potm)
        
        # Add delay to avoid hitting the server too frequently
        time.sleep(2)

# Process all matches with progress tracking
total_matches = len(scorecard_urls)
for i, scorecard_url in enumerate(scorecard_urls):
    process_match(scorecard_url, i, total_matches)

# Combine data from all matches into final DataFrames
df_batting_final = pd.concat(all_batting_data, ignore_index=True)
df_bowling_final = pd.concat(all_bowling_data, ignore_index=True)
df_fielding_final = pd.concat(all_fielding_data, ignore_index=True)
df_potm_final = pd.concat(all_potm_data, ignore_index=True) if all_potm_data else pd.DataFrame()

# Create a more flexible name mapping system
def create_name_variations(name):
    """Create different variations of a name for matching"""
    name = name.strip()
    variations = {name}  # Original name
    
    # Split name into parts
    parts = name.split()
    
    if len(parts) > 1:
        # Last name only
        variations.add(parts[-1])
        
        # First letter of first name + last name
        variations.add(f"{parts[0][0]} {parts[-1]}")
        
        # First name + last name (for cases with middle names)
        variations.add(f"{parts[0]} {parts[-1]}")
        
        # Handle initials
        if any('.' in part for part in parts):
            # Remove dots from initials
            no_dots = ' '.join(part.replace('.', '') for part in parts)
            variations.add(no_dots)
    
    return variations

# Then when creating the name mapping, it will include DNB players as well:
name_mapping = {}
for batsman, full_name in zip(df_batting_final['Batsman'], df_batting_final['Full Name']):
    batsman_variations = create_name_variations(batsman)
    full_name_variations = create_name_variations(full_name)
    
    for variation in batsman_variations | full_name_variations:
        name_mapping[variation.lower()] = full_name

# Update player names in fielding DataFrame
df_fielding_final['Player'] = df_fielding_final['Player'].apply(
    lambda x: name_mapping.get(x.strip().lower(), x)
)

# Now drop the Batsman column
df_batting_final = df_batting_final.drop('Batsman', axis=1)
df_bowling_final = df_bowling_final.drop('Bowler', axis=1)

# Display final DataFrames
print("\nBatting Statistics (All Matches)")
print(df_batting_final)
print("\nBowling Statistics (All Matches)")
print(df_bowling_final)
print("\nFielding Statistics (All Matches)")
print(df_fielding_final)
if not df_potm_final.empty:
    print("\nPlayers of the Match")
    print(df_potm_final)

Processing match 1 of 22: Match ID 88424
Processing match 2 of 22: Match ID 88431
Processing match 3 of 22: Match ID 88438
Processing match 4 of 22: Match ID 88445
Processing match 5 of 22: Match ID 88452
Processing match 6 of 22: Match ID 88459
Processing match 7 of 22: Match ID 88466
Processing match 8 of 22: Match ID 88473
Processing match 9 of 22: Match ID 88480
Processing match 10 of 22: Match ID 88487
Processing match 11 of 22: Match ID 88494
Processing match 12 of 22: Match ID 88501
Processing match 13 of 22: Match ID 88508
Processing match 14 of 22: Match ID 88515
Processing match 15 of 22: Match ID 88522
Processing match 16 of 22: Match ID 88529
Processing match 17 of 22: Match ID 88536
Processing match 18 of 22: Match ID 88543
Processing match 19 of 22: Match ID 88550
Processing match 20 of 22: Match ID 88557
Processing match 21 of 22: Match ID 88564
Processing match 22 of 22: Match ID 88571

Batting Statistics (All Matches)
               Full Name Runs Balls 4s 6s      SR  

In [192]:
df_batting_final

Unnamed: 0,Full Name,Runs,Balls,4s,6s,SR,Innings,Match_ID,Team
0,Meg Lanning,31,25,3,1,124.00,1,88424,Delhi Capitals Women
1,Shafali Verma,1,8,0,0,12.50,1,88424,Delhi Capitals Women
2,Alice Capsey,75,53,9,3,141.51,1,88424,Delhi Capitals Women
3,Jemimah Rodrigues,42,24,5,2,175.00,1,88424,Delhi Capitals Women
4,Marizanne Kapp,16,9,3,0,177.78,1,88424,Delhi Capitals Women
...,...,...,...,...,...,...,...,...,...
480,Georgia Wareham,0,0,0,0,0,2,88571,Royal Challengers Bangalore Women
481,Shreyanka Patil,0,0,0,0,0,2,88571,Royal Challengers Bangalore Women
482,Disha Kasat,0,0,0,0,0,2,88571,Royal Challengers Bangalore Women
483,Asha Sobhana,0,0,0,0,0,2,88571,Royal Challengers Bangalore Women


In [193]:
import pandas as pd
import math

# Ensure numeric columns are properly converted
numeric_columns = ['Runs', 'Balls', '4s', '6s', 'SR']
df_batting_final[numeric_columns] = df_batting_final[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Calculate batting points
def calculate_sr_points(row):
    """Calculate Strike Rate points based on the given criteria"""
    if row['Balls'] < 10:
        return 0
        
    sr = row['SR']
    if sr < 50:
        return -15
    elif sr < 75:
        return -10
    elif sr < 100:
        return -5
    elif sr < 125:
        return 0
    elif sr < 150:
        return 5
    elif sr < 200:
        return 10
    else:
        return 15

def calculate_bonus_points(runs):
    """Calculate bonus points for every 25 runs"""
    return (runs // 25) * 10

# Calculate individual point components
df_batting_final['Run_Points'] = df_batting_final['Runs'] * 1
df_batting_final['Four_Points'] = df_batting_final['4s'] * 2
df_batting_final['Six_Points'] = df_batting_final['6s'] * 5
df_batting_final['SR_Points'] = df_batting_final.apply(calculate_sr_points, axis=1)
df_batting_final['Bonus_Points'] = df_batting_final['Runs'].apply(calculate_bonus_points)

# Calculate total batting points
df_batting_final['Batting_Points'] = (
    df_batting_final['Run_Points'] + 
    df_batting_final['Four_Points'] + 
    df_batting_final['Six_Points'] + 
    df_batting_final['SR_Points'] + 
    df_batting_final['Bonus_Points']
)

# Optionally, drop the intermediate point columns if you don't need them
df_batting_final.drop(['Run_Points', 'Four_Points', 'Six_Points', 'SR_Points', 'Bonus_Points'], axis=1, inplace=True)

# Display the updated DataFrame
print(df_batting_final)

# Ensure numeric columns are properly converted for bowling
numeric_columns = ['Overs', 'Maidens', 'Runs', 'Wickets', 'No Balls', 'Wides', 'Econ']
df_bowling_final[numeric_columns] = df_bowling_final[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Calculate bowling points
def calculate_economy_points(row):
    """Calculate Economy Rate points based on the given criteria"""
    if row['Overs'] < 1:
        return 0
        
    economy = row['Econ']
    if economy < 5.01:
        return 20
    elif economy < 6.01:
        return 15
    elif economy < 7.01:
        return 10
    elif economy < 8.01:
        return 5
    elif economy < 9.01:
        return 0
    elif economy < 10.01:
        return -5
    elif economy < 12.01:
        return -10
    else:
        return -20

def calculate_wicket_bonus(wickets):
    """Calculate bonus points for wicket milestones (2+ wickets)"""
    return max(0, (wickets - 1) * 10)  # Ensures no negative values

# Calculate individual point components
df_bowling_final['Wicket_Points'] = df_bowling_final['Wickets'] * 20
df_bowling_final['Economy_Points'] = df_bowling_final.apply(calculate_economy_points, axis=1)
df_bowling_final['No_Ball_Points'] = df_bowling_final['No Balls'] * -2
df_bowling_final['Wide_Points'] = (df_bowling_final['Wides'] // 2) * -1  # -1 point per 2 wides
df_bowling_final['Wicket_Bonus_Points'] = df_bowling_final['Wickets'].apply(calculate_wicket_bonus)
df_bowling_final['Maiden_Points'] = df_bowling_final['Maidens'] * 20

# Calculate total bowling points
df_bowling_final['Bowling_Points'] = (
    df_bowling_final['Wicket_Points'] + 
    df_bowling_final['Economy_Points'] + 
    df_bowling_final['No_Ball_Points'] + 
    df_bowling_final['Wide_Points'] + 
    df_bowling_final['Wicket_Bonus_Points'] +
    df_bowling_final['Maiden_Points']
)

# Optionally, drop the intermediate point columns
df_bowling_final.drop([
    'Wicket_Points', 'Economy_Points', 'No_Ball_Points', 
    'Wide_Points', 'Wicket_Bonus_Points', 'Maiden_Points'
], axis=1, inplace=True)

# Display the updated DataFrame
print(df_bowling_final)

# Ensure numeric columns are properly converted for fielding
numeric_columns = ['Catches', 'Stumpings', 'Run Outs']
df_fielding_final[numeric_columns] = df_fielding_final[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Calculate fielding points
df_fielding_final['Catch_Points'] = df_fielding_final['Catches'] * 10
df_fielding_final['Stumping_Points'] = df_fielding_final['Stumpings'] * 10
df_fielding_final['RunOut_Points'] = df_fielding_final['Run Outs'] * 10

# Calculate total fielding points
df_fielding_final['Fielding_Points'] = (
    df_fielding_final['Catch_Points'] + 
    df_fielding_final['Stumping_Points'] + 
    df_fielding_final['RunOut_Points']
)

# Optionally, drop the intermediate point columns
df_fielding_final.drop(['Catch_Points', 'Stumping_Points', 'RunOut_Points'], axis=1, inplace=True)

# Display the updated DataFrame
print(df_fielding_final)

# Add Player of the Match points
if df_potm_final is not None:
    df_potm_final['POTM_Points'] = 50
    
    # Display the updated DataFrame
    print("\nPlayer of the Match with Points:")
    print(df_potm_final)

               Full Name  Runs  Balls  4s  6s      SR  Innings Match_ID  \
0            Meg Lanning    31     25   3   1  124.00        1    88424   
1          Shafali Verma     1      8   0   0   12.50        1    88424   
2           Alice Capsey    75     53   9   3  141.51        1    88424   
3      Jemimah Rodrigues    42     24   5   2  175.00        1    88424   
4         Marizanne Kapp    16      9   3   0  177.78        1    88424   
..                   ...   ...    ...  ..  ..     ...      ...      ...   
480      Georgia Wareham     0      0   0   0    0.00        2    88571   
481      Shreyanka Patil     0      0   0   0    0.00        2    88571   
482          Disha Kasat     0      0   0   0    0.00        2    88571   
483         Asha Sobhana     0      0   0   0    0.00        2    88571   
484  Renuka Thakur Singh     0      0   0   0    0.00        2    88571   

                                  Team  Batting_Points  
0                 Delhi Capitals Women    

In [194]:
df_potm_final

Unnamed: 0,Match_ID,Player_Name,Player_ID,POTM_Points
0,88424,Harmanpreet Kaur,9082,50
1,88431,Asha Sobhana,41186,50
2,88438,Amelia Kerr,11855,50
3,88445,Marizanne Kapp,9182,50
4,88452,Renuka Thakur Singh,14491,50
5,88459,Kiran Navgire,26143,50
6,88466,Marizanne Kapp,9182,50
7,88473,Grace Harris,11101,50
8,88480,Amelia Kerr,11855,50
9,88487,Jess Jonassen,9537,50


In [200]:
# First, let's create copies of our dataframes with just the necessary columns
batting_points = df_batting_final[['Full Name', 'Team', 'Match_ID', 'Batting_Points']].copy()
bowling_points = df_bowling_final[['Full Name', 'Team', 'Match_ID', 'Bowling_Points']].copy()
fielding_points = df_fielding_final[['Player', 'Match_ID', 'Fielding_Points']].copy()  # Remove Team from fielding

# Rename Player column in fielding to match others
fielding_points = fielding_points.rename(columns={'Player': 'Full Name'})

# Create a player-team mapping from batting data
player_team_mapping = batting_points[['Full Name', 'Team', 'Match_ID']].drop_duplicates()

# If a player doesn't exist in batting, try to get their team from bowling data
bowling_team_mapping = bowling_points[['Full Name', 'Team', 'Match_ID']].drop_duplicates()
player_team_mapping = pd.concat([player_team_mapping, bowling_team_mapping]).drop_duplicates()

# Merge all points, starting with the player-team mapping as the base
fantasy_points = player_team_mapping.merge(
    batting_points[['Full Name', 'Match_ID', 'Batting_Points']], 
    on=['Full Name', 'Match_ID'], 
    how='left'
).merge(
    bowling_points[['Full Name', 'Match_ID', 'Bowling_Points']], 
    on=['Full Name', 'Match_ID'], 
    how='left'
).merge(
    fielding_points[['Full Name', 'Match_ID', 'Fielding_Points']], 
    on=['Full Name', 'Match_ID'], 
    how='left'
)

# Fill NaN values with 0 for points columns
fantasy_points['Batting_Points'] = fantasy_points['Batting_Points'].fillna(0)
fantasy_points['Bowling_Points'] = fantasy_points['Bowling_Points'].fillna(0)
fantasy_points['Fielding_Points'] = fantasy_points['Fielding_Points'].fillna(0)

# Handle POTM points
if df_potm_final is not None:
    # Create a unique key for each player-match combination
    df_potm_final['match_player_key'] = df_potm_final['Player_Name'].str.strip() + '_' + df_potm_final['Match_ID'].astype(str)
    fantasy_points['match_player_key'] = fantasy_points['Full Name'].str.strip() + '_' + fantasy_points['Match_ID'].astype(str)
    
    # Create mapping using the composite key
    potm_points = dict(zip(df_potm_final['match_player_key'], df_potm_final['POTM_Points']))
    
    # Map POTM points using the composite key
    fantasy_points['POTM_Points'] = fantasy_points['match_player_key'].map(potm_points).fillna(0)
    
    # Clean up by removing the temporary key column
    fantasy_points.drop('match_player_key', axis=1, inplace=True)
else:
    fantasy_points['POTM_Points'] = 0

# Calculate total Fantasy Points
fantasy_points['Fantasy_Points'] = (
    fantasy_points['Batting_Points'] + 
    fantasy_points['Bowling_Points'] + 
    fantasy_points['Fielding_Points'] + 
    fantasy_points['POTM_Points']
)

# Sort by Fantasy Points in descending order
fantasy_points = fantasy_points.sort_values('Fantasy_Points', ascending=False)

# Create final leaderboard with rounded values
leaderboard = fantasy_points[[
    'Full Name', 'Team', 'Match_ID', 'Fantasy_Points',
    'Batting_Points', 'Bowling_Points', 'Fielding_Points', 'POTM_Points'
]].round(2)

print("\nDetailed Fantasy Points Leaderboard:")
print(leaderboard)


Detailed Fantasy Points Leaderboard:
                  Full Name                               Team Match_ID  \
410            Ellyse Perry  Royal Challengers Bangalore Women    88550   
311           Deepti Sharma                        UP Warriorz    88522   
343        Harmanpreet Kaur               Mumbai Indians Women    88529   
58              Amelia Kerr               Mumbai Indians Women    88438   
388           Deepti Sharma                        UP Warriorz    88543   
..                      ...                                ...      ...   
20   Keerthana Balakrishnan               Mumbai Indians Women    88424   
191     Renuka Thakur Singh               Mumbai Indians Women    88480   
523            Meghna Singh                     Gujarat Giants    88529   
345            Meghna Singh               Mumbai Indians Women    88529   
484     Renuka Thakur Singh  Royal Challengers Bangalore Women    88571   

     Fantasy_Points  Batting_Points  Bowling_Points  Fielding

In [201]:
leaderboard.head()

Unnamed: 0,Full Name,Team,Match_ID,Fantasy_Points,Batting_Points,Bowling_Points,Fielding_Points,POTM_Points
410,Ellyse Perry,Royal Challengers Bangalore Women,88550,315.0,65,190.0,10.0,50.0
311,Deepti Sharma,UP Warriorz,88522,276.0,96,130.0,0.0,50.0
343,Harmanpreet Kaur,Mumbai Indians Women,88529,240.0,180,0.0,10.0,50.0
58,Amelia Kerr,Mumbai Indians Women,88438,237.0,47,130.0,10.0,50.0
388,Deepti Sharma,UP Warriorz,88543,236.0,161,65.0,10.0,0.0


In [203]:
# Group by 'Full Name' and 'Team' to sum all match-wise statistics
leaderboard_summary = leaderboard.groupby(['Full Name', 'Team'], as_index=False).sum(numeric_only=True)

# Sort by total Fantasy Points in descending order
leaderboard_summary = leaderboard_summary.sort_values(by='Fantasy_Points', ascending=False)

# Display the final summarized leaderboard
display(leaderboard_summary) 

Unnamed: 0,Full Name,Team,Fantasy_Points,Batting_Points,Bowling_Points,Fielding_Points,POTM_Points
21,Ellyse Perry,Royal Challengers Bangalore Women,912.0,574,228.0,10.0,100.0
17,Deepti Sharma,UP Warriorz,888.0,518,300.0,20.0,50.0
80,Shafali Verma,Delhi Capitals Women,713.0,603,0.0,60.0,50.0
3,Amelia Kerr,Mumbai Indians Women,630.0,341,159.0,30.0,100.0
68,Richa Ghosh,Royal Challengers Bangalore Women,629.0,459,0.0,170.0,0.0
...,...,...,...,...,...,...,...
88,Simran Bahadur,Mumbai Indians Women,-10.0,0,-10.0,0.0,0.0
110,Titas Sadhu,Royal Challengers Bangalore Women,-10.0,0,-10.0,0.0,0.0
40,Lea Tahuhu,Royal Challengers Bangalore Women,-10.0,0,-10.0,0.0,0.0
52,Minnu Mani,Royal Challengers Bangalore Women,-20.0,0,-20.0,0.0,0.0


In [211]:
def calculate_player_prices(df):
    # Sort DataFrame by Fantasy_Points in descending order if not already sorted
    df = df.sort_values('Fantasy_Points', ascending=False).copy()
    
    # Set minimum price
    MIN_PRICE = 5.0  # Reduced to 5M
    
    # Calculate price based on fantasy points but with reduced scaling
    df['Price'] = MIN_PRICE + (df['Fantasy_Points'] / 150)  # Divided by 150 instead of 100
    
    # Round prices to 1 decimal place
    df['Price'] = df['Price'].round(1)
    
    return df

# Apply pricing
leaderboard_summary = calculate_player_prices(leaderboard_summary)

# Display results
print("\nPlayer Prices (in Millions):")
print(leaderboard_summary[['Full Name', 'Team', 'Fantasy_Points', 'Price']].to_string())


Player Prices (in Millions):
                  Full Name                               Team  Fantasy_Points  Price
21             Ellyse Perry  Royal Challengers Bangalore Women           912.0   11.1
17            Deepti Sharma                        UP Warriorz           888.0   10.9
80            Shafali Verma               Delhi Capitals Women           713.0    9.8
3               Amelia Kerr               Mumbai Indians Women           630.0    9.2
68              Richa Ghosh  Royal Challengers Bangalore Women           629.0    9.2
91          Smriti Mandhana  Royal Challengers Bangalore Women           625.0    9.2
44           Marizanne Kapp               Delhi Capitals Women           616.0    9.1
54         Nat Sciver-Brunt               Mumbai Indians Women           599.0    9.0
25             Grace Harris                        UP Warriorz           588.0    8.9
45              Meg Lanning               Delhi Capitals Women           580.0    8.9
27         Harmanpreet K

In [212]:
# Save to CSV
leaderboard_summary.to_csv('player_prices.csv', index=False)