### 1. Imports:

In [None]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

### 2. Getting Basic Info:

In [None]:
link = 'https://stats.espncricinfo.com/ci/engine/records/team/match_results.html?id=2022;trophy=117;type=season'
data = []
c=1
while(True):
    res = requests.get(link)
    soup = BeautifulSoup(res.content, 'html.parser')
    for match in soup.find('div', class_ = 'div630Pad').find('table').find('tbody').find_all('tr'):
        team1 = match.find_all('td')[0].text.strip()
        team2 = match.find_all('td')[1].text.strip()
        winner = match.find_all('td')[2].text.strip()
        margin = match.find_all('td')[3].text.strip()
        ground = match.find_all('td')[4].text.strip()
        date = match.find_all('td')[5].text.strip()
        scorecard_link = 'https://stats.espncricinfo.com'+match.find_all('td')[6].find('a').get('href')
        data.append([team1, team2, winner, margin, ground, date, scorecard_link])
    link = 'https://stats.espncricinfo.com'+soup.find('p').find('a').get('href')
    c+=1
    if c == 16:
        break

In [None]:
df = pd.DataFrame(data, columns = ['team1', 'team2', 'winner', 'margin', 'ground', 'date', 'scorecard_link'])

In [None]:
df.to_csv('basci_info.csv', index=False)

### 3. Scraping Data: 

In [None]:
df = pd.read_csv('basci_info.csv')

In [None]:
full_data = []
for link in tqdm(df['scorecard_link']):
    res = requests.get(link)
    soup = BeautifulSoup(res.content, 'html.parser')
    full_scorecard = link
    try:
        team1 = soup.find('div', class_ = 'ds-flex ds-space-x-5').find('div', class_ = 'ds-flex ds-flex-col ds-mt-3 md:ds-mt-0 ds-mt-0 ds-mb-1').find_all('div')[1].text.strip()
    except:
        team1 = np.nan
    try:
        team1_score = int(soup.find('div', class_ = 'ds-flex ds-space-x-5').find('div', class_ = 'ds-flex ds-flex-col ds-mt-3 md:ds-mt-0 ds-mt-0 ds-mb-1').find_all('div')[2].text.strip().split('/')[0])
    except:
        team1_score = np.nan
    try:
        team2 = soup.find('div', class_ = 'ds-flex ds-space-x-5').find('div', class_ = 'ds-flex ds-flex-col ds-mt-3 md:ds-mt-0 ds-mt-0 ds-mb-1').find_all('div')[4].text
    except:
        team2 = np.nan
    try:
        team2_score = int(soup.find('div', class_ = 'ds-flex ds-space-x-5').find('div', class_ = 'ds-flex ds-flex-col ds-mt-3 md:ds-mt-0 ds-mt-0 ds-mb-1').find_all('div')[5].text.split(')')[-1].strip().split('/')[0])
    except:
        team2_score = np.nan
    if team1_score == team2_score:
        winner = 'Tied'
        margin = 'Tied'
    else:
        winner = soup.find('p', class_ = 'ds-text-tight-m ds-font-regular ds-truncate ds-text-typo').text.strip().split(' ')[0]
        margin = soup.find('p', class_ = 'ds-text-tight-m ds-font-regular ds-truncate ds-text-typo').text.strip().split('by')[-1].split('(')[0].strip()
    try:
        stadium = soup.find_all('table')[4].find_all('tr')[0].text.split(',')[0].strip()
    except:
        stadium = np.nan
    try:
        place = soup.find_all('table')[4].find_all('tr')[0].text.split(',')[1].strip()
    except:
        place = np.nan
    try:
        toss_winner = soup.find_all('table')[4].find_all('tr')[1].find_all('td')[-1].text.split(',')[0].strip()
    except:
        toss_winner = np.nan
    try:
        toss_choice = soup.find_all('table')[4].find_all('tr')[1].find_all('td')[-1].text.split(',')[1].strip()
    except:
        toss_choice = np.nan
    if 'field' in toss_choice:
        choice = 'Field'
    elif 'bat' in toss_choice:
        choice = 'Bat'
    try:
        man_of_the_match = soup.find_all('table')[4].find_all('tr')[4].find_all('td')[-1].text
    except:
        man_of_the_match = np.nan
    full_data.append([full_scorecard, team1, team2, team1_score, team2_score,toss_winner,choice, winner, margin, 
                      man_of_the_match,stadium, place])

### 4. Converting it into DataFrame:

In [None]:
df = pd.DataFrame(full_data, columns = ['full_scorecard', 'team1', 'team2', 'team1_score', 'team2_score','toss_winner','toss_choice', 'winner', 'margin', 
                      'man_of_the_match','stadium', 'place'])

### 5. Saving it as a .csv File:

In [None]:
df.to_csv('ipl_dataset.csv',index=False)