In [19]:
import requests
from bs4 import BeautifulSoup
import re
from tqdm.notebook import tqdm_notebook
import pandas as pd

### getting all codes

In [20]:
url = "https://www.bankofengland.co.uk/boeapps/database/index.asp?Travel=NIxIRx&levels=2&XNotes=Y&C=5JK&C=UH5&C=UH6&C=UH7&C=UH4&C=UH3&A41513XNode41513.x=12&A41513XNode41513.y=17&Nodes=X3687X3688X3691X3694X3716X3738X3754X3757X3764X3771X3774X3695X38047X3698X3739X3742X3745X3765X3768X3758X3761X3701X3704X3707X3710X3713X3717X3720X3723X3726X3729X3732X3735X3748X3751X31340X31344X31345X31346X31341X31361X31369X31372X31342X31366X31377X31385X31343X31388X31390X31391X3951X3952X3955X3958X3961X3965X3969X3972X3975X3978X3981X3985X3989X3992X3995X3998X4001X4004X4007X4010X4013X4016X4019X41107X41122X4022X4023X4026X4029X4032X4035X4036X4039X4042X4045X4048X33900X33901X4051X4052X4053X4054X4058X4062X4066X4067X4068X4069X4070X38263X38563X38564X38565X38598X4071X4072X4073X38265X38555X38556X38557X38600X4074X4075X4076X38267X38547X38548X38549X38602X4097X4098X4099X4100X4101X38281X38487X38488X38489X38614X4102X4103X4104X38283X38479X38480X38481X38616X4105X4106X4107X38285X38471X38472X38473X38618X4128X4129X4132X4135X4138X33880X37685X42428X42429X42430X55729X67292X67293X67294X79761X79762X79763X80341X85781X85921X86021X87941X88261X89101&SectionRequired=I&HideNums=-1&ExtraInfo=true#BM"

In [21]:
r = requests.get(url)

In [22]:
html = r.text.replace("\t", "").replace("\n", "").replace("\r", "")

In [23]:
codes = re.findall("([A-Z0-9]{3}[A-Z0-9]+) - Daily", html)

### given a code we can get the time series

In [24]:
url = "https://www.bankofengland.co.uk/boeapps/database/fromshowcolumns.asp?Travel=NIxAZxSUx&FromSeries=1&ToSeries=50&DAT=RNG&FD=1&FM=Jan&FY=2009&TD=31&TM=Dec&TY=2020&FNY=Y&CSVF=TT&html.x=66&html.y=26&SeriesCodes={code}&UsingCodes=Y&Filter=N&title=IUDBEDR&VPD=Y#"

In [25]:
def get_dataframe(code="XUDLPEG", url=url):
    url = url.format(code=code)
    r = requests.get(url)
    html = r.text.replace("\t", "").replace("\n", "").replace("\r", "")
    soup = BeautifulSoup(html)
    make_chart_script = soup.find(text=re.compile("AmCharts.makeChart"))
    data = re.findall('{"Date": "(\d\d-\d\d-\d{4})","Value": "(\d+.?\d+?)"', str(make_chart_script))
    df = pd.DataFrame.from_records(data, columns=["date", "value"])
    df["date"] = pd.to_datetime(df["date"])
    df["value"] = df["value"].astype(float)
    df = df.set_index("date")
    df["code"] = code

    return df

In [26]:
df = get_dataframe()

In [27]:
df.empty

True

In [34]:
results = []
failures = []
for code in tqdm_notebook(codes):
    try:
        data = get_dataframe(code)
    except:
        print(f"Data collection failed for {code}")
    if not data.empty:
        results.append(data)
    else:
        print(f"Empty dataframe returned for {code}")
        failures.append(code)

HBox(children=(FloatProgress(value=0.0, max=104.0), HTML(value='')))

Empty dataframe returned for IUDVJND
Empty dataframe returned for IUDAJND
Empty dataframe returned for IUDLEB3
Empty dataframe returned for XUDLGBG
Empty dataframe returned for XUDLASG
Empty dataframe returned for XUDLBFG
Empty dataframe returned for XUDLDMG
Empty dataframe returned for XUDLFMG
Empty dataframe returned for XUDLFFG
Empty dataframe returned for XUDLIPG
Empty dataframe returned for XUDLILG
Empty dataframe returned for XUDLNGG
Empty dataframe returned for XUDLPEG
Empty dataframe returned for XUDLSPG
Empty dataframe returned for XUDLGDG
Empty dataframe returned for IUDAJUR
Empty dataframe returned for IUDEBEN
Empty dataframe returned for IUDBK58
Empty dataframe returned for IUDAJLW



In [36]:
failures

['IUDVJND',
 'IUDAJND',
 'IUDLEB3',
 'XUDLGBG',
 'XUDLASG',
 'XUDLBFG',
 'XUDLDMG',
 'XUDLFMG',
 'XUDLFFG',
 'XUDLIPG',
 'XUDLILG',
 'XUDLNGG',
 'XUDLPEG',
 'XUDLSPG',
 'XUDLGDG',
 'IUDAJUR',
 'IUDEBEN',
 'IUDBK58',
 'IUDAJLW']

In [40]:
final_df = pd.concat(results)

In [41]:
final_df

Unnamed: 0_level_0,value,code
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-08-01,1.5000,IUDBEDR
2009-09-01,1.5000,IUDBEDR
2009-12-01,1.5000,IUDBEDR
2009-01-13,1.5000,IUDBEDR
2009-01-14,1.5000,IUDBEDR
...,...,...
2016-08-07,0.4011,IUDAJLT
2016-11-07,0.4956,IUDAJLT
2016-12-07,0.4304,IUDAJLT
2016-07-13,0.4488,IUDAJLT


In [43]:
final_df.to_csv("daily_codes.csv")