In [3]:
import pandas as pd
import urllib3
import pickle
from pathlib import Path
from bs4 import BeautifulSoup

from IPython.display import display

In [4]:
from platform import python_version
print(python_version())

3.9.6


#### Scraping the latest article of available stocks on Revolut

In [5]:
http = urllib3.PoolManager()
url = 'https://globefunder.com/revolut-stocks-list/'

response = http.request('GET', url)

In [6]:
soup = BeautifulSoup(response.data, 'html.parser')

In [7]:
time_updated = soup.find('time', {'class': 'updated'})
table = soup.find('figure', {'class': 'wp-block-table'}).table

In [8]:
attrs = ('no', 'name', 'ticker', 'price', 'sector', 'industry')
available_stocks = []

for row in table.tbody.children:
    stock = {k:v for k, v in zip(attrs, row.strings)}
    available_stocks.append(stock)

#### Converting to DataFrame and removing unused columns

In [9]:
stocks_df = pd.DataFrame(available_stocks)
stocks_df = stocks_df.drop(columns=['no', 'price'])

display(stocks_df)

Unnamed: 0,name,ticker,sector,industry
0,21st Century Fox,FOXA,Consumer Services,Broadcasting
1,23andMe Holding Co,ME,Finance,Financial Conglomerates
2,2U,TWOU,Technology Services,Packaged Software
3,3M,MMM,Producer Manufacturing,Industrial Conglomerates
4,Abbott Labs,ABT,Health Technology,Medical Specialties
...,...,...,...,...
910,Zoetis,ZTS,Health Technology,Pharmaceuticals: Generic
911,Zoom,ZM,Technology Services,Packaged Software
912,Zscaler,ZS,Technology Services,Packaged Software
913,ZTO Express,ZTO,Transportation,Air Freight/Couriers


In [10]:
stocks_df.dtypes

name        object
ticker      object
sector      object
industry    object
dtype: object

#### Saving to a file

In [14]:
out_dir = '../../data/available_stocks'
out_file = f'revolut.{time_updated["datetime"]}.jsonl'

path = Path(out_dir)
path.mkdir(exist_ok=True)
path = path.joinpath(out_file)

In [15]:
stocks_df.to_json(path, orient='records', lines=True)