In [37]:
import os
import sys
import datetime
import numpy as np
import pandas as pd
import yfinance as yf

import matplotlib.pyplot as plt
%matplotlib inline

Data Exploration - To get specific ticker data

In [38]:
start_date = datetime.datetime(2015, 1, 1).date()
end_date = datetime.datetime.now().date()
start_date, end_date

(datetime.date(2015, 1, 1), datetime.date(2025, 11, 1))

In [39]:
tickers = "NVDA"

In [40]:
nvda = yf.Ticker(tickers)

In [41]:
historical_data = nvda.history(start = start_date, end = end_date, interval = '1d')

In [42]:
historical_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02 00:00:00-05:00,0.483038,0.486638,0.47536,0.483038,113680000,0.0,0.0
2015-01-05 00:00:00-05:00,0.483038,0.484478,0.47272,0.47488,197952000,0.0,0.0
2015-01-06 00:00:00-05:00,0.4756,0.47608,0.460002,0.460482,197764000,0.0,0.0
2015-01-07 00:00:00-05:00,0.463842,0.467921,0.457843,0.459282,321808000,0.0,0.0
2015-01-08 00:00:00-05:00,0.464561,0.479439,0.464321,0.476559,283780000,0.0,0.0


In [43]:
historical_data.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
count,2725.0,2725.0,2725.0,2725.0,2725.0,2725.0,2725.0
mean,30.522505,31.047179,29.947724,30.527171,449159200.0,7.2e-05,0.005138
std,46.592921,47.318086,45.759778,46.579813,253434000.0,0.000633,0.206296
min,0.463602,0.467921,0.454483,0.459282,52448000.0,0.0,0.0
25%,3.7438,3.802064,3.655536,3.729916,284885500.0,0.0,0.0
50%,8.788759,9.008672,8.67417,8.77558,398355000.0,0.0,0.0
75%,27.629574,28.091149,27.104983,27.645567,543501000.0,0.0,0.0
max,207.979996,212.190002,204.779999,207.039993,3692928000.0,0.01,10.0


In [44]:
fig = plt.figure()

plt.plot(historical_data.Close)

plt.legend(["Close", "Open"])

<matplotlib.legend.Legend at 0x2a3a3c2e750>

Feature Engineering - Dataset

In [45]:
historical_data.drop(columns=["Dividends", "Stock Splits", "Volume"], inplace=True)

In [46]:
historical_data.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02 00:00:00-05:00,0.483038,0.486638,0.47536,0.483038
2015-01-05 00:00:00-05:00,0.483038,0.484478,0.47272,0.47488
2015-01-06 00:00:00-05:00,0.4756,0.47608,0.460002,0.460482
2015-01-07 00:00:00-05:00,0.463842,0.467921,0.457843,0.459282
2015-01-08 00:00:00-05:00,0.464561,0.479439,0.464321,0.476559


In [47]:
print(historical_data.columns)

Index(['Open', 'High', 'Low', 'Close'], dtype='object')


In [48]:
present_date = historical_data.index.max()
weekday = present_date.isoweekday()
days_to_add = 1 if weekday not in [5, 6] else (8 - weekday)
next_date = present_date + pd.Timedelta(days=days_to_add)

print(f"Present date: {present_date}")
print(f"Next valid date: {next_date}")

test_row = pd.DataFrame({'Date': [next_date],**{col: [0.0] for col in historical_data.columns if col != 'Date'}})
test_row.head()

Present date: 2025-10-31 00:00:00-04:00
Next valid date: 2025-11-02 23:00:00-05:00


Unnamed: 0,Date,Open,High,Low,Close
0,2025-11-02 23:00:00-05:00,0.0,0.0,0.0,0.0


Since the useful columns are - Date, Close(shows closing price), We need an adj_Close (for splits/dividends).
We can drop the rest

We need Lag Features for each day, to keep track of last traded price.

In [49]:
for i in range(1, 7):
    historical_data[f"Close_lag_{i}"] = historical_data.Close.shift(periods=i, axis=0)
    historical_data[f"Open_lag_{i}"] = historical_data.Open.shift(periods=i, axis=0)
    historical_data[f"High_lag_{i}"] = historical_data.High.shift(periods=i, axis=0)
    historical_data[f"Low_lag_{i}"] = historical_data.Low.shift(periods=i, axis=0)

historical_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Close_lag_1,Open_lag_1,High_lag_1,Low_lag_1,Close_lag_2,Open_lag_2,...,High_lag_4,Low_lag_4,Close_lag_5,Open_lag_5,High_lag_5,Low_lag_5,Close_lag_6,Open_lag_6,High_lag_6,Low_lag_6
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02 00:00:00-05:00,0.483038,0.486638,0.47536,0.483038,,,,,,,...,,,,,,,,,,
2015-01-05 00:00:00-05:00,0.483038,0.484478,0.47272,0.47488,0.483038,0.483038,0.486638,0.47536,,,...,,,,,,,,,,
2015-01-06 00:00:00-05:00,0.4756,0.47608,0.460002,0.460482,0.47488,0.483038,0.484478,0.47272,0.483038,0.483038,...,,,,,,,,,,
2015-01-07 00:00:00-05:00,0.463842,0.467921,0.457843,0.459282,0.460482,0.4756,0.47608,0.460002,0.47488,0.483038,...,,,,,,,,,,
2015-01-08 00:00:00-05:00,0.464561,0.479439,0.464321,0.476559,0.459282,0.463842,0.467921,0.457843,0.460482,0.4756,...,0.486638,0.47536,,,,,,,,


In [50]:
historical_data.drop(columns = ["Open","High","Low"],inplace = True)

In [51]:
historical_data.fillna(0, inplace = True)
historical_data.head()

Unnamed: 0_level_0,Close,Close_lag_1,Open_lag_1,High_lag_1,Low_lag_1,Close_lag_2,Open_lag_2,High_lag_2,Low_lag_2,Close_lag_3,...,High_lag_4,Low_lag_4,Close_lag_5,Open_lag_5,High_lag_5,Low_lag_5,Close_lag_6,Open_lag_6,High_lag_6,Low_lag_6
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02 00:00:00-05:00,0.483038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-05 00:00:00-05:00,0.47488,0.483038,0.483038,0.486638,0.47536,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-06 00:00:00-05:00,0.460482,0.47488,0.483038,0.484478,0.47272,0.483038,0.483038,0.486638,0.47536,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-07 00:00:00-05:00,0.459282,0.460482,0.4756,0.47608,0.460002,0.47488,0.483038,0.484478,0.47272,0.483038,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-08 00:00:00-05:00,0.476559,0.459282,0.463842,0.467921,0.457843,0.460482,0.4756,0.47608,0.460002,0.47488,...,0.486638,0.47536,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Defining a function to do this 

In [56]:
historical_data.reset_index(inplace=True)
print(historical_data.columns)

Index(['Date', 'Close', 'Close_lag_1', 'Open_lag_1', 'High_lag_1', 'Low_lag_1',
       'Close_lag_2', 'Open_lag_2', 'High_lag_2', 'Low_lag_2', 'Close_lag_3',
       'Open_lag_3', 'High_lag_3', 'Low_lag_3', 'Close_lag_4', 'Open_lag_4',
       'High_lag_4', 'Low_lag_4', 'Close_lag_5', 'Open_lag_5', 'High_lag_5',
       'Low_lag_5', 'Close_lag_6', 'Open_lag_6', 'High_lag_6', 'Low_lag_6'],
      dtype='object')


In [54]:
def get_stock_data(ticker: str):        
    start_date = datetime.datetime(2015, 1, 1).date()
    end_date = datetime.datetime.now().date()
    try:
        check = yf.Ticker(ticker)
    except:
        print("Error in fetching data")
        return
    historical_data = check.history(start = start_date, end = end_date, interval = '1d')
    historical_data.drop(columns=["Dividends", "Stock Splits", "Volume"], inplace=True)
    present_date = historical_data.index.max()
    weekday = present_date.isoweekday()
    days_to_add = 1 if weekday not in [5, 6] else (8 - weekday)
    next_date = present_date + pd.Timedelta(days=days_to_add)
    test_row = pd.DataFrame({'Date': [next_date],**{col: [0.0] for col in historical_data.columns if col != 'Date'}})
    for i in range(1, 7):
        historical_data[f"Close_lag_{i}"] = historical_data.Close.shift(periods=i, axis=0)
        historical_data[f"Open_lag_{i}"] = historical_data.Open.shift(periods=i, axis=0)
        historical_data[f"High_lag_{i}"] = historical_data.High.shift(periods=i, axis=0)
        historical_data[f"Low_lag_{i}"] = historical_data.Low.shift(periods=i, axis=0)
    historical_data.drop(columns = ["Open","High","Low"],inplace = True)
    historical_data.fillna(0, inplace = True)
    historical_data.reset_index(inplace=True)
    return historical_data