## Loading all 3 ticker together

In [1]:
import yfinance as yf
tickers_list = ["UNH", "PFE", "MRK"]
tickers = yf.Tickers(tickers_list)
hist = tickers.download(start="2022-01-01", end="2024-12-31") 

[*********************100%***********************]  3 of 3 completed


In [2]:
hist.to_csv('datasets/combined_data.csv')

## Saving the tickers into separate csv files

In [59]:
for ticker in tickers_list:
    stock = yf.Ticker(ticker)
    stock_hist = stock.history(start="2022-01-01", end="2024-12-31")
    stock_hist.to_csv(f"datasets/{ticker}.csv")


## To load a single ticker from csv

In [32]:
import pandas as pd
x = pd.read_csv("datasets/UNH.csv")
x["Date"] = pd.to_datetime(x["Date"], utc=True).dt.strftime('%d-%m-%Y') #convert to date object
x

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,03-01-2022,480.694661,483.482684,474.012993,482.886627,3033600,0.0,0.0
1,04-01-2022,483.771152,484.299904,471.926828,471.946045,3908100,0.0,0.0
2,05-01-2022,475.666652,477.810561,470.782806,470.782806,3921100,0.0,0.0
3,06-01-2022,461.466802,465.446968,445.238548,451.516418,6555200,0.0,0.0
4,07-01-2022,449.632184,449.901371,437.960903,440.893158,5494200,0.0,0.0
...,...,...,...,...,...,...,...,...
699,15-10-2024,552.020020,569.700012,543.000000,556.289978,11939400,0.0,0.0
700,16-10-2024,557.010010,576.179993,555.119995,571.340027,5103900,0.0,0.0
701,17-10-2024,560.580017,569.469971,558.719971,566.030029,5588700,0.0,0.0
702,18-10-2024,566.849976,570.950012,563.039978,569.609985,3686200,0.0,0.0


## If for some reason u want use this csv to load 1 df only

In [6]:
y = pd.read_csv("datasets/combined_data.csv")
y

Unnamed: 0,Price,Close,Close.1,Close.2,Dividends,Dividends.1,Dividends.2,High,High.1,High.2,...,Low.2,Open,Open.1,Open.2,Stock Splits,Stock Splits.1,Stock Splits.2,Volume,Volume.1,Volume.2
0,Ticker,MRK,PFE,UNH,MRK,PFE,UNH,MRK,PFE,UNH,...,UNH,MRK,PFE,UNH,MRK,PFE,UNH,MRK,PFE,UNH
1,Date,,,,,,,,,,...,,,,,,,,,,
2,2022-01-03 00:00:00+00:00,71.0883560180664,50.231380462646484,482.88665771484375,0.0,0.0,0.0,71.15309080470647,51.916102533923464,483.48271443752,...,474.0130231550537,70.82016702896833,51.87176836566253,480.6946910970124,0.0,0.0,0.0,9441200,57219200,3033600
3,2022-01-04 00:00:00+00:00,71.21784210205078,48.35158157348633,471.9460144042969,0.0,0.0,0.0,71.45828161927723,50.036307060829664,484.29987311488367,...,471.92679717990984,70.6537215210213,49.850101520933485,483.7711207211378,0.0,0.0,0.0,11981700,66993100,3908100
4,2022-01-05 00:00:00+00:00,72.94718170166016,49.32695007324219,470.78277587890625,0.0,0.0,0.0,73.80723372506863,50.045172405887485,477.8105300195773,...,470.78277587890625,71.44903157128401,49.51315561263317,475.6666211034265,0.0,0.0,0.0,17447900,52956400,3921100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2024-10-14 00:00:00+00:00,109.69000244140625,29.079999923706055,605.4000244140625,0.0,0.0,0.0,110.06999969482422,29.170000076293945,608.6300048828125,...,597.3800048828125,109.43000030517578,29.15999984741211,599.1099853515625,0.0,0.0,0.0,8130700,23594200,2447100
701,2024-10-15 00:00:00+00:00,111.52999877929688,29.440000534057617,556.2899780273438,0.0,0.0,0.0,111.58000183105469,29.739999771118164,569.7000122070312,...,543.0,109.95999908447266,29.09000015258789,552.02001953125,0.0,0.0,0.0,9109500,29009900,11939400
702,2024-10-16 00:00:00+00:00,110.41999816894531,29.670000076293945,571.3400268554688,0.0,0.0,0.0,111.19999694824219,29.81999969482422,576.1799926757812,...,555.1199951171875,111.0,29.5,557.010009765625,0.0,0.0,0.0,8729200,28414800,5103900
703,2024-10-17 00:00:00+00:00,109.75,29.270000457763672,566.030029296875,0.0,0.0,0.0,110.19000244140625,29.530000686645508,569.469970703125,...,558.719970703125,110.13999938964844,29.479999542236328,560.5800170898438,0.0,0.0,0.0,8513200,23747500,5588200


## Adding dt, return and log-return as columns

In [60]:
import numpy as np
import pandas as pd
for ticker in tickers_list:
    stock = pd.read_csv(f"datasets/{ticker}.csv")
    stock["Date"] = pd.to_datetime(stock["Date"], utc=True)
    stock["dt"] = 1 / 250
    # Calculate return as percentage change in 'Close' prices
    stock["return"] = stock["Close"].diff()

    # Calculate log-return
    stock["log_return"] = np.log(stock["Close"] / stock["Close"].shift(1))
    
    stock.to_csv(f"datasets/{ticker}.csv")

In [61]:
stock[stock["Date"].dt.year == 2022]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,dt,return,log_return
0,2022-01-03 05:00:00+00:00,70.820167,71.153091,69.682676,71.088356,9441200,0.0,0.0,0.004,,
1,2022-01-04 05:00:00+00:00,70.653699,71.458259,70.182056,71.217819,11981700,0.0,0.0,0.004,0.129463,0.001820
2,2022-01-05 05:00:00+00:00,71.449039,73.807241,71.449039,72.947189,17447900,0.0,0.0,0.004,1.729370,0.023993
3,2022-01-06 05:00:00+00:00,72.863955,73.594538,72.087131,72.900948,11359200,0.0,0.0,0.004,-0.046242,-0.000634
4,2022-01-07 05:00:00+00:00,72.928681,74.473073,72.558765,74.260376,15212000,0.0,0.0,0.004,1.359428,0.018476
...,...,...,...,...,...,...,...,...,...,...,...
246,2022-12-23 05:00:00+00:00,106.189749,107.029719,105.827036,106.772003,4547400,0.0,0.0,0.004,0.591805,0.005558
247,2022-12-27 05:00:00+00:00,107.296990,107.468795,106.457012,107.020180,5125800,0.0,0.0,0.004,0.248177,0.002322
248,2022-12-28 05:00:00+00:00,107.755150,107.755150,106.008387,106.027481,5107800,0.0,0.0,0.004,-0.992699,-0.009319
249,2022-12-29 05:00:00+00:00,106.323385,106.619282,105.712496,105.779312,4467200,0.0,0.0,0.004,-0.248169,-0.002343


## Retrieving options data and saving to csv

In [26]:
for ticker in tickers_list:
    stock = yf.Ticker(ticker)
    opt = stock.option_chain()
    opt.calls.to_csv(f"datasets/{ticker}_calls.csv")

In [27]:
opt.calls

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,MRK241025C00090000,2024-10-09 19:54:23+00:00,90.0,20.5,0.0,0.0,0.0,0.0,,1,1e-05,True,REGULAR,USD
1,MRK241025C00099000,2024-10-15 14:05:18+00:00,99.0,11.55,0.0,0.0,0.0,0.0,,2,1e-05,True,REGULAR,USD
2,MRK241025C00100000,2024-10-18 19:01:28+00:00,100.0,9.2,0.0,0.0,0.0,0.0,20.0,21,1e-05,True,REGULAR,USD
3,MRK241025C00101000,2024-10-16 15:55:43+00:00,101.0,9.15,0.0,0.0,0.0,0.0,4.0,19,1e-05,True,REGULAR,USD
4,MRK241025C00103000,2024-10-17 16:44:11+00:00,103.0,7.15,0.0,0.0,0.0,0.0,11.0,12,1e-05,True,REGULAR,USD
5,MRK241025C00104000,2024-10-15 17:49:47+00:00,104.0,7.35,0.0,0.0,0.0,0.0,,3,1e-05,True,REGULAR,USD
6,MRK241025C00105000,2024-10-18 19:47:04+00:00,105.0,3.99,0.0,0.0,0.0,0.0,63.0,148,1e-05,True,REGULAR,USD
7,MRK241025C00106000,2024-10-18 19:43:10+00:00,106.0,3.08,0.0,0.0,0.0,0.0,7.0,9,1e-05,True,REGULAR,USD
8,MRK241025C00107000,2024-10-18 18:52:39+00:00,107.0,2.55,0.0,0.0,0.0,0.0,17.0,38,1e-05,True,REGULAR,USD
9,MRK241025C00108000,2024-10-18 19:42:06+00:00,108.0,1.63,0.0,0.0,0.0,0.0,556.0,687,1e-05,True,REGULAR,USD
