In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
url = "https://en.wikipedia.org/wiki/2025_Indian_Premier_League"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
print(response.status_code)
soup = BeautifulSoup(response.text, "html.parser")
data_records = []
print(soup.title.text)

200
2025 Indian Premier League - Wikipedia


In [3]:
matches = soup.find_all("div", style="width: 100%; clear:both")

In [5]:
# Scrapper
table2data = []
for match in matches:
    tables = match.find_all("table")
    table1_data = tables[0].get_text("\n", strip=True).split("\n")
    match_no = table1_data[0]
    match_date = table1_data[1]
    match_date = match_date.replace("\xa0", " ")
    match_time = None
    
    for item in table1_data:
        if ":" in item:
            match_time = item.replace("(" , "").strip()
            break

    table2_data = tables[1].get_text("\n", strip=True).split("\n")
    if '(H)' in table2_data:
        table2_data.remove("(H)")
    v_index = table2_data.index("v")
    
    if (len(table2_data) <= 4):
        team1 = table2_data[v_index - 1]
        score1 = "0"
        team2 = table2_data[v_index + 1]
        score2 = "0"
    else:
        team1 = table2_data[v_index - 2]
        score1 = table2_data[v_index - 1]
        team2 = table2_data[v_index + 1]
        score2 = table2_data[v_index + 2]

    #print(team1, score1, team2, score2)
    table2data.append(table2_data)
    table3_data = tables[2].get_text("\n", strip=True).split("\n")
    venue = table3_data[1]
    result = table3_data[0]
    winner = result.split(" won")[0]
    
    if len(table3_data) == 7:
        pom = "no result"
    else:
        pom = table3_data[len(table3_data) - 2]
    
    toss_winner = "N/A"
    toss_desicion = "N/A"
    for li in match.select("li"):
        text = li.get_text(strip=True)
        if "won the toss" in text:
            toss_winner = text.split(" won the toss")[0]
            toss_decision = text.split(" to ")[1]
            break
            
    #Data Records
    data_records.append({
        "match_no":match_no,
        "date":match_date,
        "time":match_time,
        "venue":venue,
        "toss_winner": toss_winner,
        "toss_decision":toss_decision,
        "team1":team1,
        "team1_score":score1,
        "team2":team2,
        "team2_score":score2,
        "winner":winner,
        "result":result,
        "POM":pom
    })

In [6]:
df = pd.DataFrame(data_records)

In [7]:
df.head()

Unnamed: 0,match_no,date,time,venue,toss_winner,toss_decision,team1,team1_score,team2,team2_score,winner,result,POM
0,Match 1,22 March 2025,19:30,Eden Gardens,Royal Challengers Bengaluru,field.,Kolkata Knight Riders,174/8 (20 overs),Royal Challengers Bengaluru,177/3 (16.2 overs),Royal Challengers Bengaluru,Royal Challengers Bengaluru won by 7 wickets,Krunal Pandya
1,Match 2,23 March 2025,15:30,Rajiv Gandhi International Cricket Stadium,Rajasthan Royals,field.,Sunrisers Hyderabad,286/6 (20 overs),Rajasthan Royals,242/6 (20 overs),Sunrisers Hyderabad,Sunrisers Hyderabad won by 44 runs,Ishan Kishan
2,Match 3,23 March 2025,19:30,M. A. Chidambaram Stadium,Chennai Super Kings,field.,Mumbai Indians,155/9 (20 overs),Chennai Super Kings,158/6 (19.1 overs),Chennai Super Kings,Chennai Super Kings won by 4 wickets,Noor Ahmad
3,Match 4,24 March 2025,19:30,ACA窶天DCA Cricket Stadium,Delhi Capitals,field.,Lucknow Super Giants,209/8 (20 overs),Delhi Capitals,211/9 (19.3 overs),Delhi Capitals,Delhi Capitals won by 1 wicket,Ashutosh Sharma
4,Match 5,25 March 2025,19:30,Narendra Modi Stadium,Gujarat Titans,field.,Punjab Kings,243/5 (20 overs),Gujarat Titans,232/5 (20 overs),Punjab Kings,Punjab Kings won by 11 runs,Shreyas Iyer


In [None]:
#Save dataset into a csv file
df.to_csv("ipl_2025.csv", index = False)

In [8]:
df['batting_first'] = df.apply(lambda x: x['toss_winner']
                               if x['toss_decision'] == 'bat'
                               else (x['team1'] if x['toss_winner'] != x['team1'] else x['team2']),
                               axis = 1)

In [9]:
df.head(2)

Unnamed: 0,match_no,date,time,venue,toss_winner,toss_decision,team1,team1_score,team2,team2_score,winner,result,POM,batting_first
0,Match 1,22 March 2025,19:30,Eden Gardens,Royal Challengers Bengaluru,field.,Kolkata Knight Riders,174/8 (20 overs),Royal Challengers Bengaluru,177/3 (16.2 overs),Royal Challengers Bengaluru,Royal Challengers Bengaluru won by 7 wickets,Krunal Pandya,Kolkata Knight Riders
1,Match 2,23 March 2025,15:30,Rajiv Gandhi International Cricket Stadium,Rajasthan Royals,field.,Sunrisers Hyderabad,286/6 (20 overs),Rajasthan Royals,242/6 (20 overs),Sunrisers Hyderabad,Sunrisers Hyderabad won by 44 runs,Ishan Kishan,Sunrisers Hyderabad


In [10]:
df['batting_second'] = df.apply(lambda x: x['team2']
                               if x['batting_first'] == x['team1']
                               else x['team1'],
                               axis = 1)

In [11]:
df.head()

Unnamed: 0,match_no,date,time,venue,toss_winner,toss_decision,team1,team1_score,team2,team2_score,winner,result,POM,batting_first,batting_second
0,Match 1,22 March 2025,19:30,Eden Gardens,Royal Challengers Bengaluru,field.,Kolkata Knight Riders,174/8 (20 overs),Royal Challengers Bengaluru,177/3 (16.2 overs),Royal Challengers Bengaluru,Royal Challengers Bengaluru won by 7 wickets,Krunal Pandya,Kolkata Knight Riders,Royal Challengers Bengaluru
1,Match 2,23 March 2025,15:30,Rajiv Gandhi International Cricket Stadium,Rajasthan Royals,field.,Sunrisers Hyderabad,286/6 (20 overs),Rajasthan Royals,242/6 (20 overs),Sunrisers Hyderabad,Sunrisers Hyderabad won by 44 runs,Ishan Kishan,Sunrisers Hyderabad,Rajasthan Royals
2,Match 3,23 March 2025,19:30,M. A. Chidambaram Stadium,Chennai Super Kings,field.,Mumbai Indians,155/9 (20 overs),Chennai Super Kings,158/6 (19.1 overs),Chennai Super Kings,Chennai Super Kings won by 4 wickets,Noor Ahmad,Mumbai Indians,Chennai Super Kings
3,Match 4,24 March 2025,19:30,ACA窶天DCA Cricket Stadium,Delhi Capitals,field.,Lucknow Super Giants,209/8 (20 overs),Delhi Capitals,211/9 (19.3 overs),Delhi Capitals,Delhi Capitals won by 1 wicket,Ashutosh Sharma,Lucknow Super Giants,Delhi Capitals
4,Match 5,25 March 2025,19:30,Narendra Modi Stadium,Gujarat Titans,field.,Punjab Kings,243/5 (20 overs),Gujarat Titans,232/5 (20 overs),Punjab Kings,Punjab Kings won by 11 runs,Shreyas Iyer,Punjab Kings,Gujarat Titans
