## Ticker

In [14]:
ticker_symbol = "NVDA"

## Data Collection

In [15]:
import yfinance as yf

ticker = yf.Ticker(ticker_symbol)
df = ticker.history(period="3y")  # 1 year of daily data
df = df[["Open", "High", "Low", "Close", "Volume"]]
df.reset_index(inplace=True)


In [16]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2022-07-25 00:00:00-04:00,16.994482,17.099332,16.625016,16.999475,480748000
1,2022-07-26 00:00:00-04:00,16.864672,16.894629,16.454265,16.509186,397865000
2,2022-07-27 00:00:00-04:00,17.007466,17.910165,16.888638,17.764376,569776000
3,2022-07-28 00:00:00-04:00,17.94911,18.113871,17.41488,17.958096,474646000
4,2022-07-29 00:00:00-04:00,17.787341,18.21772,17.666515,18.136837,435460000


In [17]:
df.shape

(752, 6)

## Feature Engineering

### Technical Indicators

In [18]:
# RSI (Relative Strength Index)

import pandas as pd


def compute_rsi(prices: pd.Series, period: int = 14) -> pd.Series:
    delta = prices.diff()

    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)

    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))

    return rsi


In [19]:
# EMA (Exponential Moving Averages)

def compute_ema(series: pd.Series, span: int) -> pd.Series:
    return series.ewm(span=span, adjust=False).mean()

In [20]:
#  MACD (Moving Average Convergence Divergence)

def compute_macd(prices: pd.Series):
    ema_12 = compute_ema(prices, span=12)
    ema_26 = compute_ema(prices, span=26)

    macd_line = ema_12 - ema_26
    signal_line = compute_ema(macd_line, span=9)
    histogram = macd_line - signal_line

    return macd_line, signal_line, histogram

In [21]:
# Bollinger Bands

def compute_bollinger_bands(prices: pd.Series, window: int = 20, num_std: int = 2):
    sma = prices.rolling(window=window).mean()
    std = prices.rolling(window=window).std()
    
    bb_upper = sma + num_std * std
    bb_lower = sma - num_std * std
    
    return bb_upper, sma, bb_lower

In [22]:
df["RSI"] = compute_rsi(df["Close"])
df["MACD"], df["MACD_signal"], df["MACD_hist"] = compute_macd(df["Close"])
df["BB_upper"], df["BB_middle"], df["BB_lower"] = compute_bollinger_bands(df["Close"])

### Lag Features

In [23]:
for lag in range(1, 6):
    df[f"Close_lag_{lag}"] = df["Close"].shift(lag)

### Rolling Statistics

In [24]:
df["Close_rolling_mean_5"] = df["Close"].rolling(window=5).mean()

## Data Cleaning

In [25]:
df.dropna(inplace=True)

In [26]:
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI,MACD,MACD_signal,MACD_hist,BB_upper,BB_middle,BB_lower,Close_lag_1,Close_lag_2,Close_lag_3,Close_lag_4,Close_lag_5,Close_rolling_mean_5
747,2025-07-17 00:00:00-04:00,172.020004,174.160004,170.830002,173.0,160841100,78.758823,7.910958,7.155343,0.755615,175.514337,158.282001,141.049664,171.369995,170.699997,164.070007,164.919998,164.100006,168.812
748,2025-07-18 00:00:00-04:00,173.639999,174.25,171.259995,172.410004,146456400,75.17174,7.982884,7.320851,0.662033,176.857465,159.628501,142.399538,173.0,171.369995,170.699997,164.070007,164.919998,170.310001
749,2025-07-21 00:00:00-04:00,172.75,173.380005,171.0,171.380005,123126100,72.383849,7.866098,7.4299,0.436197,177.299776,161.005001,144.710226,172.410004,173.0,171.369995,170.699997,164.070007,171.772
750,2025-07-22 00:00:00-04:00,171.339996,171.389999,164.580002,167.029999,193114300,73.21612,7.337947,7.41151,-0.073562,176.570008,162.148001,147.725994,171.380005,172.410004,173.0,171.369995,170.699997,171.038
751,2025-07-23 00:00:00-04:00,169.529999,171.259995,167.970001,170.779999,153077000,73.033734,7.139677,7.357143,-0.217467,176.53709,163.292001,150.046913,167.029999,171.380005,172.410004,173.0,171.369995,170.920001


In [27]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'RSI', 'MACD',
       'MACD_signal', 'MACD_hist', 'BB_upper', 'BB_middle', 'BB_lower',
       'Close_lag_1', 'Close_lag_2', 'Close_lag_3', 'Close_lag_4',
       'Close_lag_5', 'Close_rolling_mean_5'],
      dtype='object')

## Save Data

In [28]:
df.to_csv(f'../data/{ticker_symbol}.csv', index=False)