## Loading all 3 ticker together

In [1]:
import yfinance as yf
tickers_list = ["UNH", "PFE", "MRK"]
tickers = yf.Tickers(tickers_list)
hist = tickers.download(start="2022-01-01", end="2024-12-31") 

[*********************100%***********************]  3 of 3 completed


In [2]:
hist.to_csv('datasets/combined_data.csv')

## Saving the tickers into separate csv files

In [3]:
for ticker in tickers_list:
    stock = yf.Ticker(ticker)
    stock_hist = stock.history(start="2022-01-01", end="2024-12-31")
    stock_hist.to_csv(f"datasets/{ticker}.csv")


## To load a single ticker from csv

In [4]:
import pandas as pd
x = pd.read_csv("datasets/UNH.csv")
x["Date"] = pd.to_datetime(x["Date"], utc=True).dt.strftime('%d-%m-%Y') #convert to date object
x

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,03-01-2022,480.694691,483.482714,474.013023,482.886658,3033600,0.0,0.0
1,04-01-2022,483.771183,484.299936,471.926858,471.946075,3908100,0.0,0.0
2,05-01-2022,475.666621,477.810530,470.782776,470.782776,3921100,0.0,0.0
3,06-01-2022,461.466896,465.447062,445.238639,451.516510,6555200,0.0,0.0
4,07-01-2022,449.632152,449.901340,437.960873,440.893127,5494200,0.0,0.0
...,...,...,...,...,...,...,...,...
698,14-10-2024,599.109985,608.630005,597.380005,605.400024,2447100,0.0,0.0
699,15-10-2024,552.020020,569.700012,543.000000,556.289978,11939400,0.0,0.0
700,16-10-2024,557.010010,576.179993,555.119995,571.340027,5103900,0.0,0.0
701,17-10-2024,560.580017,569.469971,558.719971,566.030029,5588200,0.0,0.0


## If for some reason u want use this csv to load 1 df only

In [6]:
y = pd.read_csv("datasets/combined_data.csv")
y

Unnamed: 0,Price,Close,Close.1,Close.2,Dividends,Dividends.1,Dividends.2,High,High.1,High.2,...,Low.2,Open,Open.1,Open.2,Stock Splits,Stock Splits.1,Stock Splits.2,Volume,Volume.1,Volume.2
0,Ticker,MRK,PFE,UNH,MRK,PFE,UNH,MRK,PFE,UNH,...,UNH,MRK,PFE,UNH,MRK,PFE,UNH,MRK,PFE,UNH
1,Date,,,,,,,,,,...,,,,,,,,,,
2,2022-01-03 00:00:00+00:00,71.0883560180664,50.231380462646484,482.88665771484375,0.0,0.0,0.0,71.15309080470647,51.916102533923464,483.48271443752,...,474.0130231550537,70.82016702896833,51.87176836566253,480.6946910970124,0.0,0.0,0.0,9441200,57219200,3033600
3,2022-01-04 00:00:00+00:00,71.21784210205078,48.35158157348633,471.9460144042969,0.0,0.0,0.0,71.45828161927723,50.036307060829664,484.29987311488367,...,471.92679717990984,70.6537215210213,49.850101520933485,483.7711207211378,0.0,0.0,0.0,11981700,66993100,3908100
4,2022-01-05 00:00:00+00:00,72.94718170166016,49.32695007324219,470.78277587890625,0.0,0.0,0.0,73.80723372506863,50.045172405887485,477.8105300195773,...,470.78277587890625,71.44903157128401,49.51315561263317,475.6666211034265,0.0,0.0,0.0,17447900,52956400,3921100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2024-10-14 00:00:00+00:00,109.69000244140625,29.079999923706055,605.4000244140625,0.0,0.0,0.0,110.06999969482422,29.170000076293945,608.6300048828125,...,597.3800048828125,109.43000030517578,29.15999984741211,599.1099853515625,0.0,0.0,0.0,8130700,23594200,2447100
701,2024-10-15 00:00:00+00:00,111.52999877929688,29.440000534057617,556.2899780273438,0.0,0.0,0.0,111.58000183105469,29.739999771118164,569.7000122070312,...,543.0,109.95999908447266,29.09000015258789,552.02001953125,0.0,0.0,0.0,9109500,29009900,11939400
702,2024-10-16 00:00:00+00:00,110.41999816894531,29.670000076293945,571.3400268554688,0.0,0.0,0.0,111.19999694824219,29.81999969482422,576.1799926757812,...,555.1199951171875,111.0,29.5,557.010009765625,0.0,0.0,0.0,8729200,28414800,5103900
703,2024-10-17 00:00:00+00:00,109.75,29.270000457763672,566.030029296875,0.0,0.0,0.0,110.19000244140625,29.530000686645508,569.469970703125,...,558.719970703125,110.13999938964844,29.479999542236328,560.5800170898438,0.0,0.0,0.0,8513200,23747500,5588200


## Adding dt, return and log-return as columns

In [8]:
import numpy as np
import pandas as pd
for ticker in tickers_list:
    stock = pd.read_csv(f"datasets/{ticker}.csv")
    stock["Date"] = pd.to_datetime(stock["Date"], utc=True)
    stock["dt"] = stock["Date"].diff().dt.total_seconds() / (365.25 * 24 * 60 * 60)
    # Calculate return as percentage change in 'Close' prices
    stock["return"] = stock["Close"].diff()

    # Calculate log-return
    stock["log_return"] = np.log(stock["Close"] / stock["Close"].shift(1))
    stock.to_csv(f"datasets/{ticker}.csv")

In [9]:
stock

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,dt,return,log_return
0,0,2022-01-03 05:00:00+00:00,70.820167,71.153091,69.682676,71.088356,9441200,0.0,0.0,,,
1,1,2022-01-04 05:00:00+00:00,70.653722,71.458282,70.182078,71.217842,11981700,0.0,0.0,0.002738,0.129486,0.001820
2,2,2022-01-05 05:00:00+00:00,71.449032,73.807234,71.449032,72.947182,17447900,0.0,0.0,0.002738,1.729340,0.023992
3,3,2022-01-06 05:00:00+00:00,72.863955,73.594538,72.087131,72.900948,11359200,0.0,0.0,0.002738,-0.046234,-0.000634
4,4,2022-01-07 05:00:00+00:00,72.928703,74.473096,72.558787,74.260399,15212000,0.0,0.0,0.002738,1.359451,0.018476
...,...,...,...,...,...,...,...,...,...,...,...,...
698,698,2024-10-14 04:00:00+00:00,109.430000,110.070000,108.680000,109.690002,8130700,0.0,0.0,0.008214,-0.040001,-0.000365
699,699,2024-10-15 04:00:00+00:00,109.959999,111.580002,109.889999,111.529999,9109500,0.0,0.0,0.002738,1.839996,0.016635
700,700,2024-10-16 04:00:00+00:00,111.000000,111.199997,109.019997,110.419998,8729200,0.0,0.0,0.002738,-1.110001,-0.010002
701,701,2024-10-17 04:00:00+00:00,110.139999,110.190002,109.500000,109.750000,8513200,0.0,0.0,0.002738,-0.669998,-0.006086
