In [1]:
import requests
import json
import re
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

In [2]:
# Base URL for fetching data
BASE_URL = 'https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/'

In [3]:
# List of match IDs
MATCH_IDS = [1799]

In [4]:
# Generate full URLs
URLS = [f'{BASE_URL}{match_id}-squad.js' for match_id in MATCH_IDS]
URLS

['https://ipl-stats-sports-mechanic.s3.ap-south-1.amazonaws.com/ipl/feeds/1799-squad.js']

In [5]:
# Headers to mimic a real browser
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
    )
}

In [6]:
def fetch_and_process_data(url):
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()

        # Parse JSON data
        start_index = response.text.find('(') + 1
        end_index = response.text.rfind(')')
        json_data = response.text[start_index:end_index]
        data = json.loads(json_data)

        # Extract OverHistory
        over_history = data["squadA"]

        # Create DataFrame and save to CSV
        df = pd.DataFrame(over_history)
        match_id = re.search(r'(\d+)-squad.js', url).group(1)
        csv_file_name = f"{match_id}-Squad.csv"
        df.to_csv(csv_file_name, index=False)

        print(f"Saved: {csv_file_name}")

    except requests.exceptions.RequestException as e:
        print(f"Request failed for {url}: {e}")
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON for {url}: {e}")
    except KeyError as e:
        print(f"KeyError: Missing expected key in the JSON data for {url}: {e}")
    except Exception as e:
        print(f"An error occurred for {url}: {e}")

In [7]:
# Use ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(fetch_and_process_data, URLS)

Saved: 1799-Squad.csv
