In [28]:
import requests

def display_webpage_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        # Send a GET request to the specified URL with the custom User-Agent
        response = requests.get(url, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            print("Page loaded successfully!")
            print("Response Content:")
            print(response.text)  # Print the entire response content
            # last_line = response.text.strip().splitlines()[-1]
            # print(last_line)
        else:
            print(f"Failed to load the page. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
url = "https://www.iplt20.com/teams/chennai-super-kings/squad/2008#list"
display_webpage_content(url)

Page loaded successfully!
Response Content:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title id="page_title" data-page-title="Chennai Super Kings | CSK 2025 Squad & Overview | IPLT20">Chennai Super Kings | CSK 2025 Squad & Overview | IPLT20</title>
    <link rel="canonical" href="https://www.iplt20.com/teams/chennai-super-kings/squad/2008">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
    <!-- <meta name="viewport" content="width=device-width, user-scalable=no" /> -->
    <meta name="csrf-token" content="vdrfJjPll1tCbcHey3mcFhK4jmhiAnTSwVObdRPn">

    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <meta name="theme-color" content="#19398a">
    <meta name="og:type" content="website">
    <meta name="og:url"  content="https://www.iplt20.com/teams/chennai-super-kings/squad/2008">
    <meta pro

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

def display_webpage_content_and_extract_player_names(year, team_url):
    url = f"https://www.iplt20.com/teams/{team_url}/squad/{year}#list"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        # Send a GET request to the specified URL with the custom User-Agent
        response = requests.get(url, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            print(f"Page loaded successfully for year {year}!")
            html_content = response.text  # Store the response content
            
            # Extract player names from the HTML content
            player_names = extract_player_names(html_content)
            return player_names
        else:
            print(f"Failed to load the page for year {year}. Status code: {response.status_code}")
            return []
    except Exception as e:
        print(f"An error occurred for year {year}: {e}")
        return []

def extract_player_names(html_content):
    # Parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all anchor tags with the specific attributes
    player_names = []
    for link in soup.find_all('a', attrs={'data-event_context': 'player'}):
        player_name = link['data-player_name']
        player_names.append(player_name)
    
    return player_names

def save_player_names_to_csv(start_year, end_year, filename, team_url):
    all_player_names = {}
    
    for year in range(start_year, end_year + 1):
        player_names = display_webpage_content_and_extract_player_names(year, team_url)
        all_player_names[year] = player_names
    
    # Write the player names to a CSV file
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Year', 'Player Names'])  # Header row
        
        for year, names in all_player_names.items():
            writer.writerow([year, ', '.join(names)])  # Join names with a comma

# Example usage
save_player_names_to_csv(2008, 2025, 'CSK.csv', 'chennai-super-kings')
save_player_names_to_csv(2008, 2025, 'DC.csv', 'delhi-capitals')
save_player_names_to_csv(2022, 2025, 'GT.csv', 'gujarat-titans')
save_player_names_to_csv(2013, 2025, 'SRH.csv', 'sunrisers-hyderabad')
save_player_names_to_csv(2008, 2025, 'KKR.csv', 'kolkata-knight-riders')
save_player_names_to_csv(2022, 2025, 'LSG.csv', 'lucknow-super-giants')
save_player_names_to_csv(2008, 2025, 'MI.csv', 'mumbai-indians')
save_player_names_to_csv(2008, 2025, 'PK.csv', 'punjab-kings')
save_player_names_to_csv(2008, 2025, 'RR.csv', 'rajasthan-royals')
save_player_names_to_csv(2008, 2025, 'RCB.csv', 'royal-challengers-bangalore')

In [4]:
import csv

def create_yearly_played_csv(input_file, output_file):
    """Create a new CSV file with players and their participation in each year."""
    years = [str(year) for year in range(2008, 2026)]  # List of years from 2008 to 2025
    player_data = {}

    # Read the original CSV file
    with open(input_file, mode='r') as infile:
        reader = csv.DictReader(infile)
        
        # Store player names and their corresponding years
        for row in reader:
            player_name = row['Player Name']
            player_years = row['Years'].split(', ') if row['Years'] else []
            player_data[player_name] = set(player_years)  # Use a set for faster lookup

    # Create a new CSV file to write the output
    with open(output_file, mode='w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['Player Name'] + years)  # Write header with years

        # Write each player's participation in the corresponding years
        for player in player_data:
            row = [player]  # Start with the player name
            for year in years:
                if year in player_data[player]:
                    row.append('played')  # Mark as played if the year is present
                else:
                    row.append('')  # Leave empty if not played
            writer.writerow(row)  # Write the row to the CSV

# Call the function with the input and output file names
create_yearly_played_csv('CSK.csv', 'CSK_Yearly_Played.csv')
create_yearly_played_csv('DC.csv', 'DC_Yearly_Played.csv')
create_yearly_played_csv('GT.csv', 'GT_Yearly_Played.csv')
create_yearly_played_csv('KKR.csv', 'KKR_Yearly_Played.csv')
create_yearly_played_csv('LSG.csv', 'LSG_Yearly_Played.csv')
create_yearly_played_csv('MI.csv', 'MI_Yearly_Played.csv')
create_yearly_played_csv('PK.csv', 'PK_Yearly_Played.csv')
create_yearly_played_csv('RR.csv', 'RR_Yearly_Played.csv')
create_yearly_played_csv('SRH.csv', 'SRH_Yearly_Played.csv')
create_yearly_played_csv('RCB.csv', 'RCB_Yearly_Played.csv')

In [20]:
import csv

def create_yearly_played_csv(input_file, output_file):
    """Create a new CSV file with players and their participation in each year."""
    years = [str(year) for year in range(2008, 2026)]  # List of years from 2008 to 2025
    player_years = {year: [] for year in years}  # Initialize a dictionary to hold players for each year

    # Read the original CSV file
    with open(input_file, mode='r') as infile:
        reader = csv.DictReader(infile)
        
        # Process each row
        for row in reader:
            year = row['Year']
            players = row['Player Names'].split(', ') if row['Player Names'] else []  # Split players by comma
            
            # Populate the player_years dictionary
            for player in players:
                if year in player_years:
                    player_years[year].append(player.strip())  # Add player to the corresponding year

    # Create a new CSV file to write the output
    with open(output_file, mode='w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['Player Name'] + years)  # Write header with years

        # Create a set of all unique players
        all_players = set()
        for players in player_years.values():
            all_players.update(players)

        # Write each player's participation in the corresponding years
        for player in all_players:
            row = [player]  # Start with the player name
            for year in years:
                if player in player_years.get(year, []):
                    row.append('played')  # Mark as played if the player is in that year
                else:
                    row.append('')  # Leave empty if not played
            writer.writerow(row)  # Write the row to the CSV

def process_multiple_csv_files(input_files):
    """Process a list of CSV files and create corresponding yearly played files."""
    for input_file in input_files:
        team_name = input_file.split('/')[-1].split('.')[0]  # Extract team name from the file name
        output_file = f"{team_name}_Yearly_Played.csv"  # Define output file name
        create_yearly_played_csv(input_file, output_file)  # Call the function for each file

# Example usage
csv_files = [
    'RR.csv',
    'SRH.csv',
    'CSK.csv',
    'DC.csv',
    'GT.csv',
    'KKR.csv',
    'LSG.csv',
    'MI.csv',
    'PK.csv',
    'RCB.csv'
]

process_multiple_csv_files(csv_files)  # Call the function with the list of CSV files

In [21]:
import csv
import glob
from collections import defaultdict

def create_master_csv(team_files_pattern, output_file):
    """Create a master CSV file with unique player names and their participation in each year."""
    years = [str(year) for year in range(2008, 2026)]  # List of years from 2008 to 2025
    player_years = defaultdict(lambda: {year: '' for year in years})  # Initialize with empty strings

    # Find all team files matching the pattern
    team_files = glob.glob(team_files_pattern)

    # Process each team file
    for team_file in team_files:
        team_name = team_file.split('_')[0]  # Extract team name from the file name
        with open(team_file, mode='r') as infile:
            reader = csv.DictReader(infile)
            for row in reader:
                player_name = row['Player Name']
                # Check each year and mark the team name if applicable
                for year in years:
                    if row.get(year) == 'played':
                        player_years[player_name][year] = team_name  # Use team name instead of "played"

    # Create a new CSV file to write the master output
    with open(output_file, mode='w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['Player Name'] + years)  # Write header with years

        # Write each unique player and their participation in the corresponding years
        for player, participation in player_years.items():
            row = [player] + [participation[year] for year in years]  # Create row with player name and years
            writer.writerow(row)  # Write the row to the CSV

# Call the function with the pattern for team files and the output file name
create_master_csv('*_Yearly_Played.csv', 'Master_Player_Years.csv')

In [13]:
def find_absent_elements(list1, list2):
    """Find elements in list1 that are not in list2."""
    absent_elements = set(list1) - set(list2)  # Set difference
    return list(absent_elements)

absent = find_absent_elements(player_names_required, player_names_obtained)
print("Elements in player_names_required absent in player_names_obtained:", absent)

Elements in player_names_required absent in player_names_obtained: ['Heinrich Klaassen', 'Tilak Varma', 'Varun Chakravarthy', 'Arshad Khan', 'Swastik Chikara', 'Manvanth Kumar', 'Sai Sudharsan', 'Brydon Carse', 'Vyshak Vijaykumar', 'Mohammed Shami', 'Shahbaz Ahmed', 'Satyanarayana Raju', 'Aniket Varma', 'Kumar Kartikeya', 'Andre Siddharth', 'Bevon Jacobs']


In [22]:
import csv

def filter_players_by_last_entry(input_file, output_file):
    """Create a new CSV file with entries that have a non-empty last entry for the year 2025."""
    with open(input_file, mode='r') as infile:
        reader = csv.DictReader(infile)
        filtered_players = []

        # Process each row
        for row in reader:
            if row['2025']:  # Check if the entry for 2025 is not empty
                filtered_players.append(row)  # Add the row to the filtered list

    # Create a new CSV file to write the filtered output
    with open(output_file, mode='w', newline='') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)  # Use the same fieldnames as the input
        writer.writeheader()  # Write header
        writer.writerows(filtered_players)  # Write the filtered rows

# Call the function with the input and output file names
filter_players_by_last_entry('Master_Player_Years.csv', 'Final_Players_YearWise_IPL_data.csv')

In [1]:
import csv

def extract_first_column_to_list(input_file):
    """Extract the first column from the given CSV file and return it as a list."""
    first_column = []

    # Read the original CSV file
    with open(input_file, mode='r') as infile:
        reader = csv.reader(infile)
        next(reader)  # Skip the header row if there is one

        # Collect values from the first column
        for row in reader:
            if row:  # Ensure the row is not empty
                first_column.append(row[0])  # Append the first column value

    return first_column

# Example usage
csv_file = 'data/RishabhGod - PlayerWiseInfo.csv'  # Replace with your actual CSV file name
first_column_list_1 = extract_first_column_to_list(csv_file)

csv_file = 'Final_Players_YearWise_IPL_data.csv'  # Replace with your actual CSV file name
first_column_list_2 = extract_first_column_to_list(csv_file)

In [4]:
print(len(first_column_list_1))
print(len(first_column_list_2))

220
228


In [5]:
def find_absent_elements(list1, list2):
    """Find elements in list1 that are not in list2."""
    absent_elements = set(list1) - set(list2)  # Set difference
    return list(absent_elements)

absent = find_absent_elements(first_column_list_2, first_column_list_1)
print("Elements in first_column_list_2 absent in first_column_list_1:", absent)

Elements in first_column_list_2 absent in first_column_list_1: ['Harnoor Pannu', 'Sherfane Rutherford', 'Priyansh Arya', 'Gurnoor Singh Brar', 'Musheer Khan', 'Pyla Avinash', 'Corbin Bosch', 'Wiaan Mulder', 'Lockie Ferguson', 'Akash Singh', 'Jayant Yadav']


In [6]:
def find_absent_elements(list1, list2):
    """Find elements in list1 that are not in list2."""
    absent_elements = set(list1) - set(list2)  # Set difference
    return list(absent_elements)

absent = find_absent_elements(first_column_list_1, first_column_list_2)
print("Elements in first_column_list_1 absent in first_column_list_2:", absent)

Elements in first_column_list_1 absent in first_column_list_2: ['Gurnoor Brar', 'Brydon Carse', 'Lizaad Williams']
