In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time

In [None]:
# Use the actual link you copied from DevTools
url = "https://www.rugbypremierleague.in/feeds/live/FixturesAndResults.json"

# Step 2: Make the request
headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept": "application/json"
}
response = requests.get(url, headers=headers)

In [None]:
# Step 3: Load as JSON
if response.status_code == 200:
    data = response.json()
    print("✅ Data loaded successfully")
else:
    raise Exception(f"❌ Failed to fetch data: {response.status_code}")


✅ Data loaded successfully


In [None]:
json_data = response.json()
print(json_data.keys())

dict_keys(['matches'])


In [None]:
# Step 4: Normalize JSON
matches = pd.json_normalize(data['matches'])  # 'data' is the main list of matches


In [None]:
# ✅ Print all columns
print(matches.columns.tolist())

['is_rescheduled', 'event_status', 'series_id', 'event_state', 'event_sub_status_id', 'result_code', 'event_islinkable', 'event_status_id', 'result_sub_code', 'league_code', 'clock_minutes', 'venue_id', 'event_sub_status', 'game_id', 'match_number', 'Match_ID', 'buy_tickets', 'participants', 'tour_id', 'end_date', 'winning_margin', 'venue_name', 'event_group', 'event_name', 'series_name', 'sport', 'tour_name', 'start_date', 'event_format', 'event_stage', 'is_playoff']


In [None]:
data['matches'][0]['participants']

[{'name': 'Bengaluru Bravehearts',
  'short_name': 'BLB',
  'id': '30',
  'value': '21',
  'highlight': '',
  'players_involved': [{'name': 'Pol Pla',
    'id': '165',
    'value': '',
    'type': ''},
   {'name': 'Tone Ng Shiu', 'id': '191', 'value': '', 'type': ''},
   {'name': 'Iowane Teba', 'id': '245', 'value': '', 'type': ''},
   {'name': 'Liam Poulton', 'id': '370', 'value': '', 'type': ''},
   {'name': 'Mak Kwai Chung', 'id': '371', 'value': '', 'type': ''},
   {'name': 'Akuila Rokolisoa', 'id': '372', 'value': '', 'type': ''},
   {'name': 'Scott Curry', 'id': '373', 'value': '', 'type': ''},
   {'name': 'Mohit Khatri', 'id': '374', 'value': '', 'type': ''},
   {'name': 'Karan Rajbhar', 'id': '375', 'value': '', 'type': ''},
   {'name': 'Arpan Chhetri', 'id': '376', 'value': '', 'type': ''},
   {'name': 'Suresh Kumar', 'id': '377', 'value': '', 'type': ''},
   {'name': 'Prashant Singh', 'id': '378', 'value': '', 'type': ''},
   {'name': 'Philip Wokorach', 'id': '1575', 'value':

In [None]:
# Step 5: Select and rename useful columns
match_list = []

for match in data['matches']:
    try:
        team_a = match['participants'][0]['name']
        team_a_score = match['participants'][0].get('value', 'N/A')

        team_b = match['participants'][1]['name']
        team_b_score = match['participants'][1].get('value', 'N/A')

        match_list.append({
            'Match ID' : match['Match_ID']
            'Date': match['start_date'],
            'Venue': match['venue_name'],
            'Team A': team_a,
            'Team A Score': team_a_score,
            'Team B': team_b,
            'Team B Score': team_b_score,
            'Match Status': match['event_status'],
            'Match Name': match['event_name']
        })
    except Exception as e:
        print(f"Skipped a match due to error: {e}")
        continue

In [None]:
# Step 6: Convert date format (optional)
df = pd.DataFrame(match_list)
df['Date'] = pd.to_datetime(df['Date']).dt.date

In [None]:
df.head()

Unnamed: 0,Date,Venue,Team A,Team A Score,Team B,Team B Score,Match Status,Match Name
0,2025-06-15,"Andheri Sports Complex, Mumbai",Bengaluru Bravehearts,21,Delhi Redz,21,Match Completed,"Rugby Premier League, 2025"
1,2025-06-15,"Andheri Sports Complex, Mumbai",Chennai Bulls,24,Mumbai Dreamers,5,Match Completed,"Rugby Premier League, 2025"
2,2025-06-15,"Andheri Sports Complex, Mumbai",Hyderabad Heroes,24,Kalinga Black Tigers,14,Match Completed,"Rugby Premier League, 2025"
3,2025-06-16,"Andheri Sports Complex, Mumbai",Kalinga Black Tigers,10,Bengaluru Bravehearts,35,Match Completed,"Rugby Premier League, 2025"
4,2025-06-16,"Andheri Sports Complex, Mumbai",Mumbai Dreamers,17,Chennai Bulls,31,Match Completed,"Rugby Premier League, 2025"


In [None]:
# Step 7: Save to CSV
df.to_csv("rpl_match_results_cleaned.csv", index=False)
print("✅ Cleaned data saved to rpl_match_results_cleaned.csv")

✅ Cleaned data saved to rpl_match_results_cleaned.csv


# Now extracting player level dataset.

In [None]:
# Look at all keys in one match object
base_match_url = "https://www.rugbypremierleague.in/feeds/live/MatchCenter_MatchID-{}.json"

In [None]:
# Checking for one match to get idea how tables are alinged and what we have to extract
match_url = "https://www.rugbypremierleague.in/feeds/live/MatchCenter_MatchID-43.json"
match_detail = requests.get(match_url).json()
# View top-level keys
print(match_detail.keys())

dict_keys(['match_detail', 'teams', 'events'])


In [None]:
# Try printing each major section
import json

# Safe pretty print of a sample
print(json.dumps(match_detail['teams'], indent=2)[:2000])

{
  "team": [
    {
      "id": "30",
      "name": "Bengaluru Bravehearts",
      "short_name": "BLB",
      "display_name": "",
      "score": "21",
      "is_home_team": true,
      "formation": "",
      "jersy_color": "",
      "direction": "",
      "support_staff": "",
      "squad": [
        {
          "id": "165",
          "name": "Pol Pla",
          "short_name": "Pol Pla",
          "display_name": "",
          "jersey_no": "7",
          "formation": "",
          "position_id": "4",
          "position": "Back",
          "is_onbench": "",
          "is_substitute": "0",
          "is_substituted": "0",
          "isPlaying": "0",
          "minutes_played": "18",
          "jersey": "7",
          "starter": "1",
          "captain": "1",
          "is_Injured": "0",
          "attacking": {
            "offloads": "0",
            "line_breaks": "0",
            "defenders_beaten": "0",
            "points": "0",
            "flyhalf_kicks": null,
            "flyha

In [None]:
url = "https://www.rugbypremierleague.in/feeds/live/MatchCenter_MatchID-44.json"
response_1 = requests.get(url)
data_1 = response.json()
# Check if teams and squads exist
for team in data_1.get("team", []):
    print(f"Team: {team['name']} | Squad size: {len(team.get('squad', []))}")


In [None]:
# Flatten player structure
def flatten_player(player_dict, team_name, match_id, match_date=None, venue=None):
    flat = {
        "match_id": match_id,
        "match_date": match_date,
        "venue": venue,
        "team": team_name,
        "player_id": player_dict.get("id"),
        "player_name": player_dict.get("name"),
        "jersey_no": player_dict.get("jersey_no"),
        "position": player_dict.get("position"),
        "minutes_played": player_dict.get("minutes_played"),
        "is_starter": player_dict.get("starter"),
        "is_captain": player_dict.get("captain"),
        "is_substitute": player_dict.get("is_substitute"),
        "is_substituted": player_dict.get("is_substituted"),
        "is_injured": player_dict.get("is_Injured"),
        "is_playing": player_dict.get("isPlaying"),
    }

    for section in ["attacking", "defence_discipline", "kicking", "others"]:
        for key, value in player_dict.get(section, {}).items():
            flat[f"{section}_{key}"] = value

    return flat


# Extract player data for one match
def extract_match_player_stats(match_id, match_date=None, venue=None):
    url = f"https://www.rugbypremierleague.in/feeds/live/MatchCenter_MatchID-{match_id}.json"
    try:
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            return []
        data = response.json()

        teams = data.get("teams", {}).get("team", [])
        all_players = []

        for team in teams:
            team_name = team.get("name", "Unknown")
            squad = team.get("squad", [])
            for player in squad:
                flat = flatten_player(player, team_name, match_id, match_date, venue)
                all_players.append(flat)

        return all_players

    except Exception as e:
        print(f"❌ Error for match {match_id}: {e}")
        return []


# Main loop for all matches
def extract_all_matches():
    fixtures_url = "https://www.rugbypremierleague.in/feeds/live/FixturesAndResults.json"
    fixtures = requests.get(fixtures_url).json().get("matches", [])

    all_players_data = []

    for match in tqdm(fixtures, desc="Extracting matches"):
        match_id = match.get("Match_ID")
        if not match_id:
            continue

        match_date = match.get("start_date", "")[:10]
        venue = match.get("venue_name", "")

        players = extract_match_player_stats(match_id, match_date, venue)
        if players:
            all_players_data.extend(players)
        time.sleep(5)  # 💤 Be kind to server

    return pd.DataFrame(all_players_data)


In [None]:
df_all = extract_all_matches()
df_all.to_csv("rpl_all_player_stats.csv", index=False)
print("✅ All match player stats saved to 'rpl_all_player_stats.csv'")
print("📊 Sample output:")
print(df_all.head())

Extracting matches: 100%|██████████| 34/34 [02:54<00:00,  5.13s/it]

✅ All match player stats saved to 'rpl_all_player_stats.csv'
📊 Sample output:
  match_id  match_date                           venue                   team  \
0       43  2025-06-15  Andheri Sports Complex, Mumbai  Bengaluru Bravehearts   
1       43  2025-06-15  Andheri Sports Complex, Mumbai  Bengaluru Bravehearts   
2       43  2025-06-15  Andheri Sports Complex, Mumbai  Bengaluru Bravehearts   
3       43  2025-06-15  Andheri Sports Complex, Mumbai  Bengaluru Bravehearts   
4       43  2025-06-15  Andheri Sports Complex, Mumbai  Bengaluru Bravehearts   

  player_id     player_name jersey_no position minutes_played is_starter  ...  \
0       165         Pol Pla         7     Back             18          1  ...   
1       191    Tone Ng Shiu         3   Hybrid             12          1  ...   
2       245     Iowane Teba        10     Back              6          0  ...   
3       370    Liam Poulton        14     Back             19          1  ...   
4       371  Mak Kwai Chung   




In [None]:
df_all.shape

(950, 101)

In [None]:
df.shape

(34, 8)