Job that stages BTC tracking figures from Yahoo Finance and writes the result to a SQL Server relational database. 

In [21]:
import pandas as pd
from bs4 import BeautifulSoup
import requests 
import yfinance as yf
from datetime import datetime
import matplotlib as plt
from typing import List
import sqlalchemy


In [11]:
# Get the concerning btc tickers using farside.co.uk as source
# The source is a static website which we scrape using BeautifulSoup
def get_btc_tickers(source_url: str) -> List:
    """
        This function scrapes the concerning source website using BeautifulSoup 
        and looks for tickers in an expected website structure and returns these 
        as a List object.
        
        parameters: 
            -source_url: str -> source url to scrape the ticker values from  
        
        return: List -> list of btc tickers
    """
    l_tickers = []
    r = requests.get(source_url)
    soup = BeautifulSoup(r.text, 'html.parser')

    etf_source_table = soup.find_all('span', class_='tabletext')
    exclude_columns = ['Date'] 
    
    # parse source table
    for etf in etf_source_table:
        ticker = etf.get_text()
        
        if ticker not in exclude_columns:
            l_tickers.append(ticker)  
        if ticker == 'GBTC':  
            break 
        
    return l_tickers 

['IBIT',
 'FBTC',
 'BITB',
 'ARKB',
 'BTCO',
 'EZBC',
 'BRRR',
 'HODL',
 'BTCW',
 'GBTC']

In [32]:
def get_btc_ticker_values(tickers: List, start_date: str, end_date: str) -> pd.DataFrame:
    """
        This function downloads the concerning btc ticker values from Yahoo finance
        and returns this result as a Pandas Dataframe.
        
        parameters: 
            -tickers: List object with the incoming ticker values you want values for.
            -start_date: Date as string as start date to use as date range towards Yahoo finance.
            -end_date: Date as string as end date to use as date range towards Yahoo finance.
        
        return:
            -pd.Dataframe with the concerning ticker values.
    """
    
    # Dictionary to hold data
    data_dict = {'Date': [], 'Ticker': [], 'Close': [], 'Volume': []} # Add more metrics as needed

    # Loop through each ticker, download the data, and fill the dictionary
    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        for date, row in data.iterrows():
            data_dict['Date'].append(date)
            data_dict['Ticker'].append(ticker)
            data_dict['Close'].append(row['Close']) # Assume 'Close' is one of the metrics you're interested in
            data_dict['Volume'].append(row['Volume']) # Same for 'Volume'

    # Create a new DataFrame from the dictionary
    return pd.DataFrame(data_dict)


In [33]:
def write_df_to_rds(df: pd.DataFrame, conn, schema, table_name):
    """
        This function writes an incoming Pandas Dataframe to the RDS Database. 
        
        parameters: 
            -df: incoming Pandas Dataframe
            -conn: incoming SQL Alchemy engine object 
            -schema: table schema within the RDS database
            -table_name: destination table name within the RDS database
    """
    
    df.to_sql(
        name=table_name, 
        con=conn, 
        schema=schema
    )

In [43]:
# Define static RDS connection credentials
servername = 'NB-SMET-5CD3102PV6'
dbname = 'syntra_dwh'
conn = sqlalchemy.create_engine(
    'mssql+pyodbc://@' + servername + '/' + dbname + '?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server'
) 

In [44]:
write_df_to_rds(
    get_btc_ticker_values(
        get_btc_tickers("https://farside.co.uk/?p=997"),
        '2020-01-01',
        '2024-06-01'
    ),
    conn,
    'staging',
    'stg_yahoo_fin_btc_ticker_figures'
)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
