In [16]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("prodzar/stocks-historical-price-data")

print("C:/Users/supre/Documents/FAI/PROJECT", path)

C:/Users/supre/Documents/FAI/PROJECT /Users/fletcher/.cache/kagglehub/datasets/prodzar/stocks-historical-price-data/versions/1


In [17]:
import pandas as pd
import os

# Path to your directory containing the CSV files
current_dir = os.getcwd()
download_path = os.path.join(current_dir, '..', 'dataset', 'historical_data')
download_path = os.path.normpath(download_path)
merged_data = pd.DataFrame()

# Loop through each file in the directory
for file in os.listdir(download_path):
    if file.endswith(".csv"):
        # Extract company name from the filename (without .csv extension)
        company_name = file.replace(".csv", "")
        
        # Load CSV into a DataFrame
        file_path = os.path.join(download_path, file)
        df = pd.read_csv(file_path)
        
        # Add a new column for the company name
        df["Company"] = company_name
        
        # Append to the merged DataFrame
        merged_data = pd.concat([merged_data, df], ignore_index=True)

# Save the merged DataFrame to a new CSV file
output_path = os.path.join(current_dir, '..', 'merged_stocks_data.csv')
output_path = os.path.normpath(output_path)
merged_data.to_csv(output_path, index=False)

print("Data merged successfully into merged_stocks_data.csv.")


Data merged successfully into merged_stocks_data.csv.


In [18]:
merged_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Company
0,2019-05-31,271.279999,272.910004,268.75,270.899994,2526600,0.0,0.0,ADBE
1,2019-06-03,270.51001,272.809998,257.529999,259.029999,3903400,0.0,0.0,ADBE
2,2019-06-04,263.269989,268.929993,257.459991,268.709991,3750800,0.0,0.0,ADBE
3,2019-06-05,272.779999,273.209991,268.390015,272.859985,2650400,0.0,0.0,ADBE
4,2019-06-06,273.720001,275.76001,271.049988,274.799988,2211800,0.0,0.0,ADBE


In [19]:
import numpy as np
import pandas as pd
import random
import ta
from collections import deque
from pathlib import Path
from sklearn.preprocessing import StandardScaler



In [21]:
# Function to compute RSI using the ta library
def compute_rsi(series, window=14):
    return ta.momentum.RSIIndicator(series, window=window).rsi()

# Define the base directory (adjust based on your project structure)
base_dir = Path.cwd().parent  # Assuming the notebook is inside 'Reinforcement learning' folder

# Define the relative path to the merged_stocks_data.csv
data_path = base_dir / 'merged_stocks_data.csv'

# Check if the file exists
if not data_path.exists():
    raise FileNotFoundError(f"The file {data_path} does not exist.")

# Load your merged data using the relative path
data = pd.read_csv(data_path)

# Filter data by company name (e.g., for a specific company)
company_data = data[data['Company'] == 'AAPL'].reset_index(drop=True)

# Feature Engineering: Adding Moving Averages as technical indicators
company_data['MA10'] = company_data['Close'].rolling(window=10).mean()
company_data['MA50'] = company_data['Close'].rolling(window=50).mean()
company_data['RSI'] = compute_rsi(company_data['Close'], window=14)  # Using ta library

# Drop rows with NaN values resulting from moving averages and RSI, then reset the index
company_data = company_data.dropna().reset_index(drop=True)

# Normalize the features
scaler = StandardScaler()
company_data[['Open', 'High', 'Low', 'Close', 'Volume', 'MA10', 'MA50', 'RSI']] = scaler.fit_transform(
    company_data[['Open', 'High', 'Low', 'Close', 'Volume', 'MA10', 'MA50', 'RSI']]
)

# Q-learning parameters
actions = ["Buy", "Sell", "Hold"]  # Actions the agent can take
alpha = 0.1  # Learning rate
gamma = 0.95  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995  # Decay rate for epsilon
q_table = {}  # Q-table for state-action pairs

# Initialize environment parameters
initial_balance = 10000  # Increased initial balance for multiple shares
balance = initial_balance  # Current balance
shares_held = 0  # Initial shares held
transaction_cost = 10  # Fixed transaction cost per trade

# Function to get state
def get_state(data, t):
    return tuple(data.iloc[t][["Open", "High", "Low", "Close", "Volume", "MA10", "MA50", "RSI"]])

# Reward function: calculates net profit/loss for an action
def get_reward(action, current_price, shares_held, balance, prev_balance):
    if action == "Sell" and shares_held > 0:
        profit = shares_held * current_price - transaction_cost
        return profit
    elif action == "Buy" and balance >= (current_price + transaction_cost):
        cost = current_price + transaction_cost
        return -cost
    else:
        return 0  # "Hold" or not enough balance

# Q-learning algorithm
for episode in range(1, 101):  # Training for 100 episodes
    balance = initial_balance
    shares_held = 0
    total_reward = 0
    
    for t in range(len(company_data) - 1):
        state = get_state(company_data, t)
        
        # Initialize Q-values for unseen states
        if state not in q_table:
            q_table[state] = [0] * len(actions)
        
        # Epsilon-greedy action selection
        if random.uniform(0, 1) < epsilon:
            action_index = random.randint(0, len(actions) - 1)  # Explore
        else:
            action_index = np.argmax(q_table[state])  # Exploit
        
        action = actions[action_index]
        current_price = company_data.loc[t, "Close"]
        prev_balance = balance
        reward = get_reward(action, current_price, shares_held, balance, balance)
        
        # Update balance and shares held based on action
        if action == "Buy" and balance >= (current_price + transaction_cost):
            balance -= (current_price + transaction_cost)
            shares_held += 1
        elif action == "Sell" and shares_held > 0:
            balance += (shares_held * current_price - transaction_cost)
            shares_held = 0
        
        total_reward += reward
        
        # Next state
        next_state = get_state(company_data, t + 1)
        
        # Initialize Q-values for unseen next states
        if next_state not in q_table:
            q_table[next_state] = [0] * len(actions)
        
        # Q-learning update using Bellman equation
        old_q_value = q_table[state][action_index]
        next_max_q = max(q_table[next_state])
        q_table[state][action_index] = old_q_value + alpha * (reward + gamma * next_max_q - old_q_value)
    
    # Handle end-of-episode: sell any remaining shares
    if shares_held > 0:
        final_price = company_data.loc[len(company_data) - 1, "Close"]
        balance += (shares_held * final_price - transaction_cost)
        total_reward += (shares_held * final_price - transaction_cost)
        shares_held = 0
    
    # Decay epsilon
    epsilon = max(epsilon * epsilon_decay, 0.01)
    
    # Logging
    print(f"Episode {episode}, Total Reward: {total_reward:.2f}, Balance: {balance:.2f}, Shares Held: {shares_held}")
    
print("Training complete.")

# Test phase (using the trained Q-table)
balance = initial_balance
shares_held = 0
total_reward = 0

for t in range(len(company_data) - 1):
    state = get_state(company_data, t)
    if state not in q_table:
        q_table[state] = [0] * len(actions)
    
    action_index = np.argmax(q_table[state])  # Always exploit
    action = actions[action_index]
    current_price = company_data.loc[t, "Close"]
    
    # Apply action
    if action == "Buy" and balance >= (current_price + transaction_cost):
        balance -= (current_price + transaction_cost)
        shares_held += 1
        reward = - (current_price + transaction_cost)
    elif action == "Sell" and shares_held > 0:
        balance += (shares_held * current_price - transaction_cost)
        reward = (shares_held * current_price - transaction_cost)
        shares_held = 0
    else:
        reward = 0  # Hold
    
    total_reward += reward

# Sell any remaining shares at the end of the test
if shares_held > 0:
    final_price = company_data.loc[len(company_data) - 1, "Close"]
    balance += (shares_held * final_price - transaction_cost)
    total_reward += (shares_held * final_price - transaction_cost)
    shares_held = 0

print(f"Test Total Reward: {total_reward:.2f}, Final Balance: {balance:.2f}, Shares Held: {shares_held}")

Episode 1, Total Reward: -2435.58, Balance: 7564.42, Shares Held: 0
Episode 2, Total Reward: -2246.00, Balance: 7754.00, Shares Held: 0
Episode 3, Total Reward: -2589.92, Balance: 7410.08, Shares Held: 0
Episode 4, Total Reward: -2423.31, Balance: 7576.69, Shares Held: 0
Episode 5, Total Reward: -2289.14, Balance: 7710.86, Shares Held: 0
Episode 6, Total Reward: -2304.37, Balance: 7695.63, Shares Held: 0
Episode 7, Total Reward: -2356.07, Balance: 7643.93, Shares Held: 0
Episode 8, Total Reward: -2329.57, Balance: 7670.43, Shares Held: 0
Episode 9, Total Reward: -2047.32, Balance: 7952.68, Shares Held: 0
Episode 10, Total Reward: -2487.13, Balance: 7512.87, Shares Held: 0
Episode 11, Total Reward: -2427.03, Balance: 7572.97, Shares Held: 0
Episode 12, Total Reward: -2359.90, Balance: 7640.10, Shares Held: 0
Episode 13, Total Reward: -2326.33, Balance: 7673.67, Shares Held: 0
Episode 14, Total Reward: -2317.76, Balance: 7682.24, Shares Held: 0
Episode 15, Total Reward: -2115.85, Balance

State and Q-Table: For each day in the test data, the agent retrieves the current state and checks for that state in the Q-table.

Exploitation (No Exploration): For each state in the test data, the agent selects the action with the highest Q-value.

Action Execution:
"Buy": Deduct the price of one stock from the balance and increase the shares_held.
"Sell": Add the value of all held shares to the balance and set shares_held to zero.

Portfolio Value Calculation: Tracks the total portfolio value (balance + value of held shares) to see the agent’s progress.

End of Testing: Prints the final portfolio value and total profit at the end of the test period