## Fab 4 Test Batting rankings over time - from 2012 to 2024

1) Make a list of dates
2) Make a request to 'https://www.relianceiccrankings.com/datespecific/test/batting/2016/01/01/'
3) Use BeautifulSoup to extract rankings of provided player or set of players- e.g Joe Root, Kane Williamson, Steve Smith, Virat Kohli for the given date
4) Store the rankings in a pandas dataframe

TODO 
- Add dates properly
- Automate plot making     
- Add a cool GUI

Handle Imports

In [5]:
import requests
import bs4 
import pandas as pd
from bs4 import BeautifulSoup
import random
import time
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36'
}

FORMAT = 'test'              #test, odi, t20
PLAYER_TYPE = 'batting'     #batting or bowling  - n.b no date specific for all-round or women's cricket :(
BASE_URL = 'https://www.relianceiccrankings.com/datespecific/' +  FORMAT + '/' + PLAYER_TYPE + '/'

#PLAYERS = ['Joe Root', 'Kane Williamson', 'Steve Smith', 'Virat Kohli']
PLAYERS = ['Harry Brook', 'Ben Duckett']
PLAYER_COLORS = ['blue','red']
YEAR_START = 2024
YEAR_END = 2024

### Execution Code

In [12]:
player_ratings = []
#create links for every month between specified years
links = create_links(YEAR_START,YEAR_END)
print(links)

for link in links:
    response = make_a_request(link)
    table = extract_table(response)
    player_rating_month = extract_player_ratings(table,PLAYERS)           
    player_ratings.append(player_rating_month)
    print("Adding {0} ratings from {1}".format(player_rating_month,link))
    wait_function()
    
print(player_ratings)

['https://www.relianceiccrankings.com/datespecific/test/batting/2024/01/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/02/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/03/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/04/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/05/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/06/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/07/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/08/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/09/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/10/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/11/01/', 'https://www.relianceiccrankings.com/datespecific/test/batting/2024/12/01/']
Rating:773 found for Player:Harry Brook
Rating:633 found for Player:Ben Duc

In [13]:
df = pd.DataFrame(player_ratings,columns=PLAYERS)

### Production Code

In [7]:
def make_a_request(link):
    try:
        response = requests.get(link, headers=HEADERS)
        if response.status_code == 200:
            return response
        else:
            print(f"Error: Received status code {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None   

In [8]:
def wait_function():
    delay = random.uniform(3, 6) # Random delay 
    print(f"Sleeping for {delay:.2f} seconds...")
    time.sleep(delay)

In [9]:
def extract_table(response):
    try:
        html_content = response.content
        soup = BeautifulSoup(html_content, "html.parser") if html_content else None
        if not soup:
            return {}
        table = soup.find('table')
        if not table:
            return {}
        return table
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return {}

In [10]:
def create_links(year_start,year_end):
    dates = []
    for year in range(year_start,year_end+1):
        for month in range(1,13):
            dates.append('{0}/{1:02d}/01/'.format(year,month))
    return [(BASE_URL + date) for date in dates]

In [11]:
def extract_player_ratings(table,players):
    rows = table.find_all('tr')
    players_found = 0
    player_rating_month = [None] * len(players)
    while players_found < len(players):         #Once we find all our players, stop
        for row in rows:
            #Find the 3rd column cell which contains the names
            player_name_cell = row.find_all('td')[2:3]
            if player_name_cell:
                name = player_name_cell[0].find('a')
                player_name = name.string.strip()
                #check if the name matches any of our players: if it does, extract the rating
                if player_name in players:
                    rating_cell = row.find_all('td')[1:2]
                    rating = rating_cell[0].string
                    print("Rating:{0} found for Player:{1}".format(rating,player_name))
                    players_found += 1
                    #Set the player rating into the correct column based on players
                    player_rating_month[PLAYERS.index(player_name)] = rating   
        return(player_rating_month)  

### Function to Create a plot with matplotlib