In [None]:
# Extracting Unique Matches

import csv

def extract_unique_series(input_file):
    """Extract unique entries from the 'Series' column in the given CSV file."""
    unique_series = set()  # Use a set to store unique entries

    # Read the original CSV file
    with open(input_file, mode='r') as infile:
        reader = csv.DictReader(infile)  # Use DictReader to access columns by name

        # Collect unique entries from the 'Series' column
        for row in reader:
            series_value = row.get('Series')  # Get the value from the 'Series' column
            if series_value:  # Check if the value is not None or empty
                unique_series.add(series_value.strip())  # Add to the set

    return unique_series

def save_to_txt(output_file, unique_series):
    """Save the unique series entries to a text file."""
    with open(output_file, mode='w') as outfile:
        for series in unique_series:
            outfile.write(series + '\n')  # Write each series entry on a new line

# Example usage
csv_file = 'Drumsticks - MatchWiseInfo.csv'  # Replace with your actual CSV file name
output_txt_file = 'unique_series.txt'  # Name of the output text file

# Extract unique series
unique_series_list = extract_unique_series(csv_file)

# Save the unique entries to a text file
save_to_txt(output_txt_file, unique_series_list)

print(f"Unique entries saved to '{output_txt_file}' successfully.")

Unique entries saved to 'unique_series.txt' successfully.


In [19]:
import csv

# Function to generate the link for a given team name
def generate_link(team_name, base_url):
    # Format the team name to match the URL structure
    formatted_team_name = team_name.lower().replace(" ", "-").replace("squad", "")
    
    team_number_year = base_url.split('-')[-1].split('/')[0]
    link = f"{base_url}{formatted_team_name}squad-{team_number_year}/series-squads"  # Use the same ID for simplicity
    return link



# Create a list to hold the generated links
links = []


# List of team names
team_names = [
    "Chennai Super Kings Squad",
    "Delhi Capitals Squad",
    "Gujarat Titans Squad",
    "Kolkata Knight Riders Squad",
    "Lucknow Super Giants Squad",
    "Mumbai Indians Squad",
    "Punjab Kings Squad",
    "Rajasthan Royals Squad",
    "Royal Challengers Bengaluru Squad",
    "Sunrisers Hyderabad Squad"
]
base_url = "https://www.espncricinfo.com/series/indian-premier-league-2024-1410320/"


# Generate links for each team name
for team in team_names:
    link = generate_link(team, base_url)
    links.append({'Link': link})  # Store only the link

# Save the links to a CSV file
output_file = 'MatchLinks_Leagues.csv'
with open(output_file, mode='w', newline='') as csvfile:
    fieldnames = ['Link']  # Only one column for links
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()  # Write the header
    writer.writerows(links)  # Write the links

print(f"Links saved to '{output_file}' successfully.")

Links saved to 'MatchLinks_Leagues.csv' successfully.


In [38]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import lxml
import csv

# Set the headers for the request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}

def extract_team_and_players(link):
    """Extract team name and player information from the given URL."""
    response = requests.get(link, headers=headers)
    soup = BeautifulSoup(response.text, "lxml")

    # Extract the team name from the URL
    team_name_segment = link.split('/')[-2]  # Get 'colombo-strikers-squad-1437544'
    team_name = team_name_segment.split('-squad')[0].replace('-', ' ').title()  # Extract and format the team name

    # Find all relevant divs containing player information
    divs = soup.find_all("div", class_="ds-flex ds-flex-row ds-items-center ds-justify-between")

    # Initialize lists to store player names and IDs
    names = []
    ids = []

    # Extract player names and IDs
    for div in divs:
        a = div.find("a")
        names.append(a.text.strip())  # Append player name
        ids.append(a.get("href").split("-")[-1])  # Append player ID

    # Create a DataFrame with the extracted data
    df = pd.DataFrame({"Name": names, "ID": ids})
    df["Team"] = team_name  # Add the team name as a new column

    return df

def process_links(input_file, output_file):
    """Process a CSV file containing links and save the extracted data to a new CSV file."""
    all_data = pd.DataFrame()  # Initialize an empty DataFrame to hold all data

    # Read the links from the input CSV file
    with open(input_file, mode='r') as infile:
        reader = csv.DictReader(infile)  # Assuming the CSV has a header
        for row in reader:
            link = row['Link']  # Replace 'Link' with the actual column name in your CSV
            team_data = extract_team_and_players(link)  # Extract team and player data
            all_data = pd.concat([all_data, team_data], ignore_index=True)  # Append to the main DataFrame

    # Save the combined DataFrame to a new CSV file
    all_data.to_csv(output_file, index=False)

# Example usage
input_csv_file = 'MatchLinks_Leagues.csv'  # Replace with your actual input CSV file name
output_csv_file = '2008.csv'  # Name of the output CSV file

process_links(input_csv_file, output_csv_file)

print(f"Team and player data extracted and saved to '{output_csv_file}' successfully.")

Team and player data extracted and saved to '2008.csv' successfully.
