In [17]:
import pandas as pd
import numpy as np
import talib

# Read Nvidia Stock price 
file_path = "/home/jesse/Projects/RL_Testing/Q_Learning/Training/nvidia_stock_prices.csv"
df = pd.read_csv(file_path)

# Checking Date Format
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)

# Take a look into the dataframe
df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01-02,0.196417,0.199083,0.192333,0.192333,436416000
2004-01-05,0.19525,0.199917,0.1935,0.198583,575292000
2004-01-06,0.198,0.209417,0.197083,0.206667,1093344000
2004-01-07,0.204333,0.2095,0.202917,0.2085,673032000
2004-01-08,0.211083,0.212083,0.20725,0.20925,433752000


In [18]:
def calculate_rsi(data, period=14):
    """
    Calculate RSI
    """
    close_prices = data["Close"].values
    rsi = talib.RSI(close_prices, timeperiod=period)
    data["RSI"] = rsi
    return data

# Calculate RSI
df = calculate_rsi(df)
df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-01-02,0.196417,0.199083,0.192333,0.192333,436416000,
2004-01-05,0.19525,0.199917,0.1935,0.198583,575292000,
2004-01-06,0.198,0.209417,0.197083,0.206667,1093344000,
2004-01-07,0.204333,0.2095,0.202917,0.2085,673032000,
2004-01-08,0.211083,0.212083,0.20725,0.20925,433752000,


In [19]:
def calculate_macd(data):
    """
    Calculate MACD
    """
    close_prices = data["Close"].values
    macd, macdsignal, macdhist = talib.MACD(close_prices, fastperiod=12, slowperiod=26, signalperiod=9)
    
    data["MACD"] = macd
    data["MACD_Signal"] = macdsignal
    data["MACD_Hist"] = macdhist
    
    return data

# Calculate MACD
df = calculate_macd(df)
df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI,MACD,MACD_Signal,MACD_Hist
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2004-01-02,0.196417,0.199083,0.192333,0.192333,436416000,,,,
2004-01-05,0.19525,0.199917,0.1935,0.198583,575292000,,,,
2004-01-06,0.198,0.209417,0.197083,0.206667,1093344000,,,,
2004-01-07,0.204333,0.2095,0.202917,0.2085,673032000,,,,
2004-01-08,0.211083,0.212083,0.20725,0.20925,433752000,,,,


In [20]:
def calculate_bollinger_bands(data, period=20):
    """
    Calculate BB (Bollinger Bands)
    """
    close_prices = data["Close"].values
    upperband, middleband, lowerband = talib.BBANDS(close_prices, timeperiod=period, nbdevup=2, nbdevdn=2, matype=0)
    
    data["Bollinger_Upper"] = upperband
    data["Bollinger_Middle"] = middleband
    data["Bollinger_Lower"] = lowerband
    
    return data

# Calculate Bollinger Bands
df = calculate_bollinger_bands(df)
df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI,MACD,MACD_Signal,MACD_Hist,Bollinger_Upper,Bollinger_Middle,Bollinger_Lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004-01-02,0.196417,0.199083,0.192333,0.192333,436416000,,,,,,,
2004-01-05,0.19525,0.199917,0.1935,0.198583,575292000,,,,,,,
2004-01-06,0.198,0.209417,0.197083,0.206667,1093344000,,,,,,,
2004-01-07,0.204333,0.2095,0.202917,0.2085,673032000,,,,,,,
2004-01-08,0.211083,0.212083,0.20725,0.20925,433752000,,,,,,,


In [21]:
# Drop NaN
df = df.dropna()

# Save into CSV
output_file = "/home/jesse/Projects/RL_Testing/Q_Learning/Training/NVDA_Preprocessed.csv"
df.to_csv(output_file)

print(f"Dropped NaN, dataframe saved to: {output_file}")

df.head()


Dropped NaN, dataframe saved to: /home/jesse/Projects/RL_Testing/Q_Learning/Training/NVDA_Preprocessed.csv


Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI,MACD,MACD_Signal,MACD_Hist,Bollinger_Upper,Bollinger_Middle,Bollinger_Lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004-02-20,0.195917,0.201667,0.194833,0.201167,618120000,58.326657,-0.002161,-0.00499,0.002829,0.203469,0.191846,0.180222
2004-02-23,0.200833,0.201167,0.1915,0.194167,853332000,50.976223,-0.002069,-0.004406,0.002336,0.203599,0.191933,0.180268
2004-02-24,0.190833,0.19325,0.184333,0.186417,708384000,44.317248,-0.002593,-0.004043,0.001451,0.202538,0.191254,0.179971
2004-02-25,0.187583,0.191083,0.1855,0.1875,652512000,45.390786,-0.002886,-0.003812,0.000925,0.202544,0.1912,0.179856
2004-02-26,0.19025,0.190583,0.186417,0.18875,344976000,46.668822,-0.002984,-0.003646,0.000662,0.202545,0.191233,0.179922


In [22]:
import pandas as pd
import talib

# 1. Read Apple stock price data
file_path = "/home/jesse/Projects/RL_Testing/Q_Learning/Testing/apple_stock_prices.csv"
# df = pd.read_csv(file_path, parse_dates=["Date"], index_col="Date")
df = pd.read_csv(file_path)

# 2. Rename and put into specific order
df = df.rename(columns={
    "Date" : "Date",
    "Open": "Open",
    "High": "High",
    "Low": "Low",
    "Close": "Close",
    "Volume": "Volume"
})
df = df[["Date", "Open", "High", "Low", "Close", "Volume"]]

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1980-12-12,0.128348,0.128906,0.128348,0.128348,469033600
1,1980-12-15,0.12221,0.12221,0.121652,0.121652,175884800
2,1980-12-16,0.113281,0.113281,0.112723,0.112723,105728000
3,1980-12-17,0.115513,0.116071,0.115513,0.115513,86441600
4,1980-12-18,0.118862,0.11942,0.118862,0.118862,73449600


In [23]:
# 3. Calculate RSI
df["RSI"] = talib.RSI(df["Close"], timeperiod=14)

# 4. Calcualte MACD
macd, macd_signal, macd_hist = talib.MACD(df["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
df["MACD"] = macd
df["MACD_Signal"] = macd_signal
df["MACD_Hist"] = macd_hist

# 5. Calculate BB (Bollinger Band)
upper, middle, lower = talib.BBANDS(df["Close"], timeperiod=20)
df["Bollinger_Upper"] = upper
df["Bollinger_Middle"] = middle
df["Bollinger_Lower"] = lower

# 6. Drop NaN (Because first few lines would contain NaN values)
df = df.dropna()

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI,MACD,MACD_Signal,MACD_Hist,Bollinger_Upper,Bollinger_Middle,Bollinger_Lower
33,1981-01-30,0.127232,0.127232,0.126116,0.126116,46188800,39.642837,-0.000377,0.001789,-0.002166,0.151717,0.140681,0.129644
34,1981-02-02,0.11942,0.11942,0.118862,0.118862,23766400,34.943807,-0.00193,0.001045,-0.002975,0.152763,0.13909,0.125418
35,1981-02-03,0.123326,0.123884,0.123326,0.123326,19152000,39.682098,-0.002769,0.000282,-0.003051,0.153145,0.138058,0.122971
36,1981-02-04,0.12779,0.128348,0.12779,0.12779,27865600,44.069116,-0.003038,-0.000382,-0.002656,0.153294,0.137556,0.121818
37,1981-02-05,0.12779,0.128906,0.12779,0.12779,7929600,44.069116,-0.003215,-0.000949,-0.002266,0.153471,0.137193,0.120915


In [24]:
# 7. Save to CSV
output_file = "/home/jesse/Projects/RL_Testing/Q_Learning/Testing/AAPL_Preprocessed.csv"
df.to_csv(output_file)

print(f"Processed data saved to {output_file}")

Processed data saved to /home/jesse/Projects/RL_Testing/Q_Learning/Testing/AAPL_Preprocessed.csv
