# Staging BTC ETF's & Financial Benchmark Data

### Script that stages BTC ETF's and Financial Benchmark data tracking figures from Yahoo Finance and writes the result to a SQL Server relational database. 

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests 
import yfinance as yf
from datetime import datetime
import matplotlib as plt
from typing import List
import sqlalchemy


### Function to retrieve BTC ETF ticker and added Benchmark data

In [2]:
# Get the concerning btc tickers using farside.co.uk as source
# The source is a static website which we scrape using BeautifulSoup
def get_tickers(source_url: str, list_extension: List) -> List:
    """
        This function scrapes the concerning source website using BeautifulSoup 
        and looks for tickers in an expected website structure and returns these 
        as a List object.
        
        parameters: 
            -source_url: str -> source url to scrape the ticker values from https://farside.co.uk/?p=997
            -list_extension : 3 financial industry Benchmark indexes for comparisment
        
        return: List -> list of tickers
    """
    l_tickers = []
    r = requests.get(source_url)
    soup = BeautifulSoup(r.text, 'html.parser')

    etf_source_table = soup.find_all('span', class_='tabletext')
    exclude_columns = ['Date'] 
    
    # parse source table
    for etf in etf_source_table:
        ticker = etf.get_text()
        
        if ticker not in exclude_columns:
            l_tickers.append(ticker)  
        if ticker == 'GBTC':  
            break
    
    for extension in list_extension:
        l_tickers.append(extension)
    
    return l_tickers 

### Function to retrieve ticker Data from Yahoo finance

In [3]:
def get_ticker_values(tickers: List, start_date: str, end_date: str) -> pd.DataFrame:
    """
        This function downloads the concerning ticker values from Yahoo finance
        and returns this result as a Pandas Dataframe.
        
        parameters: 
            -tickers: List object with the incoming ticker values you want values for.
            -start_date: Date as string as start date to use as date range towards Yahoo finance.
            -end_date: Date as string as end date to use as date range towards Yahoo finance.
        
        return:
            -pd.Dataframe with the concerning ticker values.
    """
    
    # Dictionary to hold data
    data_dict = {
        'Date': [], 
        'Ticker': [],
        'Open': [],
        'Close': [],
        'High': [], 
        'Low': [],
        'Volume': []
    } # Add more metrics as needed

    # Loop through each ticker, download the data, and fill the dictionary
    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        for date, row in data.iterrows():
            data_dict['Date'].append(date)
            data_dict['Ticker'].append(ticker)
            data_dict['Open'].append(row['Open'])
            data_dict['Close'].append(row['Close']) # Assume 'Close' is one of the metrics you're interested in
            data_dict['High'].append(row['High'])
            data_dict['Low'].append(row['Low'])
            data_dict['Volume'].append(row['Volume']) # Same for 'Volume'

    # Create a new DataFrame from the dictionary
    return pd.DataFrame(data_dict)


### Funcion to write Panda data frame to SQL server

In [4]:
def write_df_to_rds(df: pd.DataFrame, conn, schema, table_name):
    """
        This function writes an incoming Pandas Dataframe to the RDS Database. 
        
        parameters: 
            -df: incoming Pandas Dataframe
            -conn: incoming SQL Alchemy engine object 
            -schema: table schema within the RDS database
            -table_name: destination table name within the RDS database
    """
    
    df.to_sql(
        name=table_name, 
        con=conn, 
        schema=schema,
        if_exists='replace'
    )

In [5]:
# Define static RDS connection credentials
servername = 'NB-SMET-5CD3102PV6'
dbname = 'syntra_dwh'
conn = sqlalchemy.create_engine(
    'mssql+pyodbc://@' + servername + '/' + dbname + '?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server'
) 

In [6]:
write_df_to_rds(
    get_ticker_values(
        get_tickers(
            "https://farside.co.uk/?p=997",
            ['^GSPC', '^DJI', 'DX-Y.NYB', 'BTC-USD']
        ),
        '2004-11-18',
        '2024-12-31'
    ),
    conn,
    'staging',
    'stg_yahoo_fin_btc_gspc_dji_dxy_ticker_figures'
)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
