In [1]:
!pip install yfinance

In [2]:
import yfinance as yf
from pyspark.sql.types import *
from pyspark.sql import functions as f
import requests
import pandas as pd 
import datetime
from notebookutils import mssparkutils

In [12]:
def get_stock_price(symbol_list,start_date,end_date):
    historical_data = {}
    for symbol in symbol_list:
        ticker = yf.Ticker(symbol)
        data = ticker.history(start=start_date, end=end_date)
        historical_data[symbol] = data

    return historical_data

def transform_df(historical_data,symbol_list):
    dfs=[]
    for symbol in symbol_list:
        df = pd.DataFrame(historical_data[symbol])
        df['symbol']= symbol
        dfs.append(df)
    df = pd.concat(dfs).reset_index()
    df['Date'] = df['Date'].dt.date
    return df 

In [13]:
if __name__ == "__main__":
    
    symbol_list=['NVDA','AMD','INTC','QCOM','GOOG','MSFT','AMZN','AAPL'] # ticker list

    # set up date range
    end_date = datetime.datetime.now()
    start_date = end_date-datetime.timedelta(days=5*365)

    # Query data 
    historical_data = get_stock_price(symbol_list,start_date,end_date)
    pdf = transform_df(historical_data,symbol_list)
    sp_df = spark.createDataFrame(pdf)
    
    # Write dataframe into csv in data lake 
    datalake_nm = 'datalake'+mssparkutils.env.getWorkspaceName()[7:] # get datalake name 
    file_path ='abfss://files@{0}.dfs.core.windows.net/synapse/workspaces/data/stock_price'.format(datalake_nm)
    sp_df.write.option("header", True).csv(file_path,mode='overwrite')