In [1]:
import requests  # to get image from the web
from bs4 import BeautifulSoup
import pandas as pd


class EloScraper:
    link_atp_elo = "https://tennisabstract.com/reports/atp_elo_ratings.html"
    link_atp_yelo = "http://tennisabstract.com/reports/atp_season_yelo_ratings.html"
    link_wta_elo = "https://tennisabstract.com/reports/wta_elo_ratings.html"
    link_wta_yelo = "http://tennisabstract.com/reports/wta_season_yelo_ratings.html"

    def get_html_table(self, url):
        user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
        header = {
            'User-Agent': user_agent
        }
        soup = BeautifulSoup(requests.get(url, headers=header).content, "lxml")
        return soup.find("table", {"id": "reportable"})

    def convert_html_table_to_csv(self, table):
        # empty list
        data = []

        # for getting the header from the HTML table
        list_header = []
        header = table.find("tr")

        for items in header:
            try:
                list_header.append(items.get_text())
            except:
                continue

        # for getting the data
        HTML_data = table.find_all("tr")[1:]

        for element in HTML_data:
            sub_data = []
            for sub_element in element:
                try:
                    sub_data.append(sub_element.get_text(strip=True))
                except:
                    continue
            sub_data[1] = sub_data[1].replace('\xa0', ' ')
            data.append(sub_data)

        return pd.DataFrame(data=data, columns=list_header)

In [5]:
from datetime import datetime, timedelta, timezone

import pandas as pd


def main():
    jst = timezone(timedelta(hours=9), 'JST')
    now = datetime.now(jst)
    elo_scraper = EloScraper()
    atp_elo_table = elo_scraper.get_html_table(elo_scraper.link_atp_elo)
    atp_yelo_table = elo_scraper.get_html_table(elo_scraper.link_atp_yelo)
    wta_elo_table = elo_scraper.get_html_table(elo_scraper.link_wta_elo)
    wta_yelo_table = elo_scraper.get_html_table(elo_scraper.link_wta_yelo)

    # convert html table to pandas dataframe
    atp_elo_df = elo_scraper.convert_html_table_to_csv(
        atp_elo_table).set_index('Player')
    atp_yelo_df = elo_scraper.convert_html_table_to_csv(
        atp_yelo_table).set_index('Player')
    wta_elo_df = elo_scraper.convert_html_table_to_csv(
        wta_elo_table).set_index('Player')
    wta_yelo_df = elo_scraper.convert_html_table_to_csv(
        wta_yelo_table).set_index('Player')

    # join elo_df and yelo_df
    atp_elo_df = atp_elo_df.join(atp_yelo_df.drop(
        columns=['Rank', 'Wins', 'Losses']))
    wta_elo_df = wta_elo_df.join(wta_yelo_df.drop(
        columns=['Rank', 'Wins', 'Losses']))

    # save dataframe to csv file as latest
    atp_elo_df.to_csv(f'elo_ratings.csv')


if __name__ == "__main__":
    main()

In [6]:
path = "elo_ratings.csv"
elo = pd.read_csv(path)[["Player", "Elo"]]

In [7]:
elo.head(50)

Unnamed: 0,Player,Elo
0,Carlos Alcaraz,2209.3
1,Novak Djokovic,2170.7
2,Alexander Zverev,2107.5
3,Stefanos Tsitsipas,2096.0
4,Jannik Sinner,2060.0
5,Rafael Nadal,2058.8
6,Casper Ruud,2024.9
7,Daniil Medvedev,2023.9
8,Andrey Rublev,2017.0
9,Matteo Berrettini,2003.7
