In [106]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [299]:
def get_distillation_profile(crude_name, date="recent"):
    """
    Retrieve distillation profile of specified crude
    from https://crudemonitor.ca/home.php.
    
    Arguments:
    ----------
    crude_name : str
        acronym of crude
    date : str
        date for which to get distillation profile.
        Must be in format 'YYYY-MM-DD' or 'recent'.
        Defaults to 'recent'.
    """

    assert date == "recent" or re.match("\\d\\d\\d\\d-\\d\\d-\\d\\d", date), \
        "date must be either 'recent' or in format YYYY-MM-DD"
    
    distillation_data = requests.get(f"https://crudemonitor.ca/crudes/dist.php?acr={crude}&time={date}")
    soup = BeautifulSoup(distillation_data.text)
    
    err_msg1 = "No crudes match the given acronym."
    if soup.text[-34:] == err_msg1:
        print("No distillation samples available for specified crude.")
        return
    
    err_msg2 = "No distillation samples available."
    if soup.text[-34:] == err_msg2:
        print("No distillation samples available for specified date.")
        return
    
    for table in soup.find_all("table", {"class" : "table table-sm table-striped"}):

        for th in table.find_all("tr", {"id" : "tableHeadRow"}):
            headers = re.findall("[^\n]*" , th.text)

        row_list = []
        index_list = []
        for tr in table.find_all("tr"):
            row = []
            for td in tr.find_all("td"):
                row.append(td.text)
            row_list.append(row)

            for th in tr.find_all("th"):
                index = th.text
            index_list.append(index)
    
    if headers is not None:
        headers = [x for x in headers if x != ""]
    
    return pd.DataFrame(data=row_list[1:], columns=headers[1:], index=index_list[1:])

In [300]:
crude = "MGS"
date = "recent"
get_distillation_profile(crude, date)

Unnamed: 0,Temperature( oC ),Temperature( oF ),Average( oC ),Standard Deviation( oC ),Average( oF ),Standard Deviation( oF )
IBP,32.9,91.2,33.8,1.2,92.8,2.2
5,42.7,108.9,40.7,5.3,105.2,9.5
10,84.3,183.7,66.2,16.5,151.2,29.7
20,115.7,240.3,106.0,20.3,222.8,36.5
30,169.4,336.9,165.8,35.5,330.4,63.9
40,232.2,450.0,248.2,50.6,478.8,91.1
50,302.1,575.8,331.9,51.9,629.5,93.4
60,372.2,702.0,411.5,50.9,772.6,91.6
70,450.2,842.4,494.0,57.9,921.1,104.2
80,551.0,1023.8,594.7,65.9,1102.4,118.6
