In [9]:
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup

links = list(pd.read_csv('cricket.csv')['link'].values)

### 1. Defining Scraper Functions

In [10]:
def get_col_names(table):                                     # Get Column Names from the Table
    return [_.text.strip() for _ in table.find_all('th')]

def get_data(table):                                          # Get Table Data
    return [_.text.strip() for _ in table.find_all('td')]

def get_rows(table):                                          # Get number of rows from the Table
    return len(table.find('tbody').find_all('tr'))
    
def get_cols(table):                                          # Get number of columns from the Table
    return len(get_col_names(table))

def create_df(table):                                         # Create the DataFrame
    data   = np.reshape(get_data(table), (get_rows(table), get_cols(table)))
    return pd.DataFrame(data, columns = get_col_names(table))    

### 2. Final Scraper

In [11]:
bat_paths = []
bow_paths = []

for link in tqdm(links):                                  # Going through each player data one by one
    
    try:
        soup = BeautifulSoup(requests.get(link).content,'lxml')

        bat_table = soup.find_all('table', class_ = 'table cb-col-100 cb-plyr-thead')[0]
        bow_table = soup.find_all('table', class_ = 'table cb-col-100 cb-plyr-thead')[1]

        df_bat = create_df(bat_table)
        df_bow = create_df(bow_table)

        player_id = link.split('/')[-2]

        bat_path = 'Scores/bat_' + player_id + '.csv'
        bow_path = 'Scores/bow_' + player_id + '.csv'

        df_bat.to_csv(bat_path, index = False)
        df_bow.to_csv(bow_path, index = False)

        bat_paths.append(bat_path)
        bow_paths.append(bow_path)
    
    except:
        bat_paths.append(np.nan)
        bow_paths.append(np.nan)

100%|█████████████████████████████████████████| 452/452 [05:53<00:00,  1.28it/s]


### 3. Merging Scraped DataFrame Paths with Cricket.csv

In [23]:
df = pd.read_csv('cricket.csv')

df['bat_path'] = bat_paths
df['bow_path'] = bow_paths

df = df.dropna()

df.to_csv('players_record.csv', index = False)