## Importing Libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from lxml import etree

# Match Summary

## Website Data

In [2]:
url = 'https://www.espncricinfo.com/records/tournament/team-match-results/indiantopremier-league-2023-15129'

response = requests.get(url)

In [3]:
cric_web = response.text

In [4]:
soup = BeautifulSoup(cric_web, 'html.parser')
# soup

## Scraping the data

In [5]:
tablehead = soup.thead
tablehead

<thead class="ds-bg-fill-content-alternate ds-text-left"><tr class=""><td class="ds-min-w-max"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Team 1</span></div></td><td class="ds-min-w-max ds-text-right"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Team 2</span></div></td><td class="ds-min-w-max ds-text-right"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Winner</span></div></td><td class="ds-min-w-max ds-text-right"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Margin</span></div></td><td class="ds-min-w-max ds-text-right"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Ground</span></div></td><td class="ds-min-w-max ds-text-right ds-whitespace-nowrap"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Match Date</span></div></td><td class="ds-min-w-max ds-text-right"><div class="ds-popper-wrapper"><span class="ds-cursor-pointer">Scorecard</span></div></td></tr></thead>

In [6]:
row_header = []
for x in tablehead.find_all('td'):
    row_header.append(x.text)

row_header

['Team 1', 'Team 2', 'Winner', 'Margin', 'Ground', 'Match Date', 'Scorecard']

In [7]:
tablevalue = soup.tbody
tablevalue

<tbody class=""><tr class=""><td class="ds-min-w-max"><span class=""><a class="ds-inline-flex ds-items-start ds-leading-none" href="/team/gujarat-titans-1298769"><span class="ds-text-tight-s ds-font-regular ds-text-typo-primary hover:ds-text-typo-primary-hover ds-block">Titans</span></a></span></td><td class="ds-min-w-max ds-text-right"><span class=""><a class="ds-inline-flex ds-items-start ds-leading-none" href="/team/chennai-super-kings-335974"><span class="ds-text-tight-s ds-font-regular ds-text-typo-primary hover:ds-text-typo-primary-hover ds-block">Super Kings</span></a></span></td><td class="ds-min-w-max ds-text-right"><span class="">Titans</span></td><td class="ds-min-w-max ds-text-right"><span class="">5 wickets</span></td><td class="ds-min-w-max ds-text-right"><span class=""><a class="ds-inline-flex ds-items-start ds-leading-none" href="/ci/content/ground/57851.html"><span class="ds-text-tight-s ds-font-regular ds-text-typo-primary hover:ds-text-typo-primary-hover ds-block">Ah

In [8]:
table_data = []
for x in tablevalue.find_all('tr'):
    data = [y.text for y in x.find_all('td')]
    table_data.append(data)
    
table_data

[['Titans',
  'Super Kings',
  'Titans',
  '5 wickets',
  'Ahmedabad',
  'Mar 31, 2023',
  'T20'],
 ['Punjab Kings',
  'KKR',
  'Punjab Kings',
  '7 runs',
  'Mohali',
  'Apr 1, 2023',
  'T20'],
 ['Super Giants',
  'Capitals',
  'Super Giants',
  '50 runs',
  'Lucknow',
  'Apr 1, 2023',
  'T20'],
 ['Sunrisers',
  'Royals',
  'Royals',
  '72 runs',
  'Hyderabad',
  'Apr 2, 2023',
  'T20'],
 ['RCB', 'Mumbai', 'RCB', '8 wickets', 'Bengaluru', 'Apr 2, 2023', 'T20'],
 ['Super Kings',
  'Super Giants',
  'Super Kings',
  '12 runs',
  'Chennai',
  'Apr 3, 2023',
  'T20'],
 ['Capitals', 'Titans', 'Titans', '6 wickets', 'Delhi', 'Apr 4, 2023', 'T20'],
 ['Royals',
  'Punjab Kings',
  'Punjab Kings',
  '5 runs',
  'Guwahati',
  'Apr 5, 2023',
  'T20'],
 ['KKR', 'RCB', 'KKR', '81 runs', 'Eden Gardens', 'Apr 6, 2023', 'T20'],
 ['Super Giants',
  'Sunrisers',
  'Super Giants',
  '5 wickets',
  'Lucknow',
  'Apr 7, 2023',
  'T20'],
 ['Royals', 'Capitals', 'Royals', '57 runs', 'Guwahati', 'Apr 8, 2023

## Making DataFrame

In [9]:
df = pd.DataFrame(table_data, columns=row_header)
df.head()

Unnamed: 0,Team 1,Team 2,Winner,Margin,Ground,Match Date,Scorecard
0,Titans,Super Kings,Titans,5 wickets,Ahmedabad,"Mar 31, 2023",T20
1,Punjab Kings,KKR,Punjab Kings,7 runs,Mohali,"Apr 1, 2023",T20
2,Super Giants,Capitals,Super Giants,50 runs,Lucknow,"Apr 1, 2023",T20
3,Sunrisers,Royals,Royals,72 runs,Hyderabad,"Apr 2, 2023",T20
4,RCB,Mumbai,RCB,8 wickets,Bengaluru,"Apr 2, 2023",T20


## Saving the data

In [None]:
df.to_csv(r"E:\Data analyst\Cricket\Data Collection\match_table.csv", index=False)

# df.to_excel(r"E:\Data analyst\Cricket\cricket_scrap\match_table_excel.xlsx", index=False)

# Batting Data

In [10]:
match_url = []
for x in tablevalue.find_all('tr'):
    ref = ['https://www.espncricinfo.com/'+y.get('href') for y in x.find_all('a')][3]
    match_url.append(ref)
    
match_url

['https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/gujarat-titans-vs-chennai-super-kings-1st-match-1359475/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/punjab-kings-vs-kolkata-knight-riders-2nd-match-1359476/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/lucknow-super-giants-vs-delhi-capitals-3rd-match-1359477/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/sunrisers-hyderabad-vs-rajasthan-royals-4th-match-1359478/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/royal-challengers-bangalore-vs-mumbai-indians-5th-match-1359479/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/chennai-super-kings-vs-lucknow-super-giants-6th-match-1359480/full-scorecard',
 'https://www.espncricinfo.com//series/indian-premier-league-2023-1345038/delhi-capitals-vs-gujarat-t

In [11]:
def batting_data(tables):
   
    
    match = []
    inning = []
    bat_pos = []
    batsman = []
    dismissal = []
    runs = []
    balls_faced = []
    fours = []
    sixes = []
    strike_rate = []

    
    for i in range(2):
        s = tables[i]
        playing_t = s.find("span", class_="ds-text-title-xs ds-font-bold ds-capitalize")
        rows = s.find_all("tr")
        pos = 0
        for row in rows[1:]:
            columns = row.find_all('td')
            if columns[0].text == '':
                continue
            elif columns[0].text == 'Extras':
                break
            pos += 1
            bat_pos.append(pos)
            batsman.append(columns[0].text.strip())
            dismissal.append(columns[1].text.strip())
            runs.append(columns[2].text.strip())
            balls_faced.append(columns[3].text.strip())
            fours.append(columns[5].text.strip())
            sixes.append(columns[6].text.strip())
            strike_rate.append(columns[7].text.strip())
            inning.append(playing_t.text)

        
    df = pd.DataFrame({
        "Team_Innings":inning,
        "Batting_Pos":bat_pos,
        "Batsman":batsman,
        "Dismissal":dismissal,
        "Runs":runs,
        "Balls":balls_faced,
        "4s":fours,
        "6s":sixes,
        "SR":strike_rate
    })
    df["Match"] = f"{df.Team_Innings.unique()[0]} vs {df.Team_Innings.unique()[1]}"
    df = df.loc[:,["Match","Team_Innings","Batting_Pos","Batsman","Dismissal","Runs","Balls","4s","6s","SR"]]
    
    return df



## Bowlinng data

In [17]:
def bowling_data(tables):
    team1 = tables[1].div.div.span.span.text
    team2 = tables[0].div.div.span.span.text

    heading = ["Match", "Bowling_Team", "Bowler", "Overs", "Maidens", "Runs", "Wickets", "Economy", "0s", "4s", "6s", "WD", "NB"]
    df = pd.DataFrame(columns=heading)

    for t in tables:
        table = t.find('table', class_='ds-w-full ds-table ds-table-md ds-table-auto')
        column = table.tbody
        player_data = []
        playing_t = t.find("span", class_="ds-text-title-xs ds-font-bold ds-capitalize").text
        
        match = f"{team2} vs {team1}"
        for t in column.find_all('tr'):
            row = [y.text for y in t.find_all('td')]
            
            if len(row) != 1:
                if playing_t == team1:
                    row.insert(0, team2)
                else:
                    row.insert(0, team1)
                row.insert(0, match)
                player_data.append(row)
        
        b_df = pd.DataFrame(player_data, columns=heading)
        df = pd.concat([df,b_df],axis=0,ignore_index=True)
        
    return df


## Scraping data

In [18]:
bat_fact_df = pd.DataFrame()
bow_fact_df = pd.DataFrame()

print(f"{'>'*10} Collecting All Scorecards for IPL 2023 {'<'*10}")

for url in tqdm(match_url):
    try:
        response2 = requests.get(url)

        soup2 = BeautifulSoup(response2.content, 'html.parser')
        tables = soup2.find_all('div', class_='ds-rounded-lg ds-mt-2')
        
        bat_df = batting_data(tables)
        bat_fact_df = pd.concat([bat_fact_df,bat_df],axis=0,ignore_index=True)
        
        bow_df = bowling_data(tables)
        bow_fact_df = pd.concat([bow_fact_df,bow_df],axis=0,ignore_index=True)
        
        
        
    except Exception as e:
        continue



>>>>>>>>>> Collecting All Scorecards for IPL 2023 <<<<<<<<<<


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:50<00:00,  1.47it/s]


In [19]:
bow_fact_df.head()

Unnamed: 0,Match,Bowling_Team,Bowler,Overs,Maidens,Runs,Wickets,Economy,0s,4s,6s,WD,NB
0,Chennai Super Kings vs Gujarat Titans,Gujarat Titans,Mohammed Shami,4,0,29,2,7.25,13,2,2,0,1
1,Chennai Super Kings vs Gujarat Titans,Gujarat Titans,Hardik Pandya,3,0,28,0,9.33,6,2,2,0,0
2,Chennai Super Kings vs Gujarat Titans,Gujarat Titans,Josh Little,4,0,41,1,10.25,10,4,3,0,0
3,Chennai Super Kings vs Gujarat Titans,Gujarat Titans,Rashid Khan,4,0,26,2,6.5,10,2,1,0,0
4,Chennai Super Kings vs Gujarat Titans,Gujarat Titans,Alzarri Joseph,4,0,33,2,8.25,8,0,3,0,0


In [20]:
bat_fact_df.head()

Unnamed: 0,Match,Team_Innings,Batting_Pos,Batsman,Dismissal,Runs,Balls,4s,6s,SR
0,Chennai Super Kings vs Gujarat Titans,Chennai Super Kings,1,Devon Conway,b Mohammed Shami,1,6,0,0,16.66
1,Chennai Super Kings vs Gujarat Titans,Chennai Super Kings,2,Ruturaj Gaikwad,c Shubman Gill b Joseph,92,50,4,9,184.0
2,Chennai Super Kings vs Gujarat Titans,Chennai Super Kings,3,Moeen Ali,c †Saha b Rashid Khan,23,17,4,1,135.29
3,Chennai Super Kings vs Gujarat Titans,Chennai Super Kings,4,Ben Stokes,c †Saha b Rashid Khan,7,6,1,0,116.66
4,Chennai Super Kings vs Gujarat Titans,Chennai Super Kings,5,Ambati Rayudu,b Little,12,12,0,1,100.0


## Saving the data

In [21]:
bat_fact_df.to_csv("ipl_bat_summary.csv",index=None)
bow_fact_df.to_csv("ipl_bowl_summary.csv",index=None)
