<a href="https://colab.research.google.com/github/LDY681/CITS4404_project/blob/master/CITS4404.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CITS4404 Firefly Algorithm Trading Bot
#### by: Davinh Dang (22717235), Dayu Liu (24188516), Ethan Young (23450844), Flavian Jerotich (24001784), Kushan Jayasekera (24205163), Xin Wang (24201533)

# README
This folder contains this `ipynb` file to run bot training and trading, dataset is imported from kaggle.

# 1. Dataset Processing and Filter Implementation
This step involves processing kaggle dataset. Dates and Closing prices are extracted with their corresponding values, ready for use for further processing for the analysis.
### Get required Python Modules

In [4]:
# TODO Required modules
!pip install torch
!pip install numpy
!pip install pandas
!pip install kagglehub



In [5]:
# Import libraries
# Essentials
import numpy as np

# Data Processing
import pandas as pd
import kagglehub

# FIXME Modeling
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import time


### Data Processing

In [6]:
# Download dataset from Kaggle
path = kagglehub.dataset_download("prasoonkottarathil/btcinusd")
print("Path to dataset files:", path)

fileName = "BTC-Daily.csv"  #! Other candidates include BTC-{2017-2021}min.csv, BTC-Hourly.csv (Which are hourly data and may not be suitable)

def preprocess_data(path, fileName):
    # Open dataset and load file as CSV
    file_path = f"{path}/{fileName}"
    data = pd.read_csv(file_path)

    # Extract relevant columns
    dates = data['date'].tolist()
    prices = data['close'].tolist()

    # Create a DataFrame
    df = pd.DataFrame({'date': dates, 'price': prices})
    return df

train_df = preprocess_data(path, fileName)

# Variables for model
dates = train_df['date']
prices = train_df['price']
print("Dates:", dates[:5])
print("Prices:", prices[:5])

Path to dataset files: /Users/ethanyong/.cache/kagglehub/datasets/prasoonkottarathil/btcinusd/versions/4
Dates: 0    2022-03-01 00:00:00
1    2022-02-28 00:00:00
2    2022-02-27 00:00:00
3    2022-02-26 00:00:00
4    2022-02-25 00:00:00
Name: date, dtype: object
Prices: 0    43185.48
1    43178.98
2    37712.68
3    39146.66
4    39231.64
Name: price, dtype: float64


### SMA Filter Implementation

In [7]:
def pad(P, N):
    flip = np.flip(P[1:N]) # TODO The project instruction have a negate sign before the flip function, could be a typo
    return np.append(flip, P)

def sma_filter(P, N):
    # pad the series
    P_pad = pad(P, N)
    # Roll the window and calculate the weighted average
    sma_full = pd.Series(P_pad).rolling(window=N).mean().to_numpy()
    return sma_full[N-1:]

#! To generate an N-day SMA we can simply call
sma10 = sma_filter(prices, 10)
sma20 = sma_filter(prices, 20)
print("sma10:", sma10[20:25])

sma10: [42625.384 43034.841 43366.134 43218.01  42902.534]


# 2. Model Implementation (TODO)


### Model's Hyperparameters (TODO)

In [8]:
# Variable Initialization
lb = 10 # Short-term moving average window size
ub = 40 # Long-term moving average window size

### Model Constructor

In [9]:
# FIXME from https://github.com/firefly-cpp/FireflyAlgorithm/blob/master/fireflyalgorithm/fireflyalgorithm.py
import numpy as np
from numpy.random import default_rng


class FireflyAlgorithm:
    def __init__(self, pop_size=20, alpha=1.0, betamin=1.0, gamma=0.01, seed=None):
        self.pop_size = pop_size
        self.alpha = alpha
        self.betamin = betamin
        self.gamma = gamma
        self.rng = default_rng(seed)

    def run(self, function, dim, lb, ub, max_evals):
        fireflies = self.rng.uniform(lb, ub, (self.pop_size, dim))
        intensity = np.apply_along_axis(function, 1, fireflies)
        best = np.min(intensity)

        evaluations = self.pop_size
        new_alpha = self.alpha
        search_range = ub - lb

        while evaluations <= max_evals:
            new_alpha *= 0.97
            for i in range(self.pop_size):
                for j in range(self.pop_size):
                    if intensity[i] >= intensity[j]:
                        r = np.sum(np.square(fireflies[i] - fireflies[j]), axis=-1)
                        beta = self.betamin * np.exp(-self.gamma * r)
                        steps = new_alpha * (self.rng.random(dim) - 0.5) * search_range
                        fireflies[i] += beta * (fireflies[j] - fireflies[i]) + steps
                        fireflies[i] = np.clip(fireflies[i], lb, ub)
                        intensity[i] = function(fireflies[i])
                        evaluations += 1
                        best = min(intensity[i], best)
        return best, fireflies[np.argmin(intensity)] # FIXME return not just the score but also best parameters

# 3. Trading strategy (TODO)

In [28]:
def simulate_trading(X, prices=prices, initial_capital=1000, fee_rate=0.03, print_out = False):
    SMA1 = round(X[0])
    SMA2 = round(X[1])
    if SMA1 >= SMA2:
        if print_out:
            print("Warning: Short-term SMA window (SMA1) should be less than long-term SMA window (SMA2).")
        return 1e9

    # --- 1. Calculate SMAs ---
    sma_short = sma_filter(prices, SMA1)
    sma_long = sma_filter(prices, SMA2)

    # --- 2. Generate Crossover Signals ---
    # Create a position state: 1 if sma_short > sma_long, -1 otherwise
    # Use -1 for below/equal to simplify crossover detection
    position_state = pd.Series(np.where(sma_short > sma_long, 1, -1), index=prices.index)

    # Detect crossovers:
    # Buy signal (1): state changes from -1 (below/equal) to 1 (above)
    # Sell signal (-1): state changes from 1 (above) to -1 (below/equal)
    # Use .diff() to find changes. A change from -1 to 1 is diff = 2. A change from 1 to -1 is diff = -2.
    crossover = position_state.diff()
    buy_signals = pd.Series(np.where(crossover == 2, 1, 0), index=prices.index)
    sell_signals = pd.Series(np.where(crossover == -2, -1, 0), index=prices.index)

    capital = float(initial_capital)
    shares = 0.0
    holding_bitcoin = False

    start_index = max(SMA1, SMA2) # First day both SMAs are potentially valid (depends on sma_filter implementation)
                                    # Crossover signal needs one more day due to diff()
                                    # Ensure loop starts where crossover signal is not NaN

    # --- 4. Simulation Loop ---
    # Iterate through days where signals *could* be generated
    for i in range(start_index, len(prices)):
        price_today = prices.iloc[i]
        buy_signal_today = buy_signals.iloc[i]
        sell_signal_today = sell_signals.iloc[i]

        # Check for invalid price
        if pd.isna(price_today) or price_today <= 0:
            continue # Skip day if price is invalid

        # If Buy signal generated today AND currently holding cash
        if buy_signal_today == 1 and not holding_bitcoin:
            cash_to_spend = capital * (1 - fee_rate) # fee
            shares = cash_to_spend / price_today
            capital = 0.0
            holding_bitcoin = True
            if print_out:
                print(f"Day {prices.index[i]}\nBUYING @ {price_today:.2f}\nShares - USD: {shares:.6f}\n") #print buy

        # If Sell signal generated today AND currently holding Bitcoin
        elif sell_signal_today == -1 and holding_bitcoin:
            cash_received = shares * price_today
            capital = cash_received * (1 - fee_rate) # fee
            shares = 0.0
            holding_bitcoin = False
            if print_out:
                print(f"Day {prices.index[i]}\nSELL @ {price_today:.2f}\nCapital - Bitcoin: {capital:.2f}\n") # Optional logging

    # At the end of the sequence, sell remaining bitcoin (if any) at the final price to see how much was earned
    if holding_bitcoin:
        final_price = prices.iloc[-1]
        if pd.notna(final_price) and final_price > 0:
             cash_received = shares * final_price
             capital = cash_received * (1 - fee_rate) # Fee
             shares = 0.0
             holding_bitcoin = False
             if print_out:
                print(f"End Liquidation\nSELL @ {final_price:.2f}\n")
                print(f"Final capital for SMA({SMA1}, {SMA2}): Final Capital - USD: ${capital:.2f}\n")
    else:
        if print_out:
            print(f"Final capital for SMA({SMA1}, {SMA2}): Final Capital - USD: ${capital:.2f}\n")
    return -1 * capital

### Model Training (TODO)

In [None]:
# FIXME from ChatGPT on how to apply our own fitness with SMA
def firefly_fitness_wrapper(prices):
    def wrapped_firefly_fitness(X):
        simulate_trading(X=X)
    return wrapped_firefly_fitness

firefly = FireflyAlgorithm(pop_size=25, alpha=0.5, betamin=0.2, gamma=1.0, seed=42)
fitness_fn = firefly_fitness_wrapper(prices)
best_score, best_params = firefly.run(fitness_fn, dim=2, lb=np.array([5, 10]), ub=np.array([30, 60]), max_evals=1000)
print(f"Best Sharpe Ratio: {best_score}")
print(f"Best Parameters: SMA1={best_params[0]}, SMA2={best_params[1]}")

(np.float64(2280.0475453017925), array([16.76109772, 10.47494951, 13.22278224, 16.51928081, 36.07022613,
       15.1280398 , 14.78948982, 17.93997295, 34.18039573, 12.13742332]))


TypeError: '<=' not supported between instances of 'NoneType' and 'NoneType'

In [29]:
# Assume you already got these from Firefly output:
best_sma1, best_sma2 = 17, 20

simulate_trading([best_sma1, best_sma2], print_out=True)

Day 20
BUYING @ 44426.84
Shares - USD: 0.021834

Day 30
SELL @ 37941.82
Capital - Bitcoin: 803.56

Day 44
BUYING @ 43107.70
Shares - USD: 0.018081

Day 82
SELL @ 47593.12
Capital - Bitcoin: 834.74

Day 88
BUYING @ 53743.69
Shares - USD: 0.015066

Day 125
SELL @ 58465.90
Capital - Bitcoin: 854.41

Day 134
BUYING @ 62034.65
Shares - USD: 0.013360

Day 136
SELL @ 60870.26
Capital - Bitcoin: 788.83

Day 142
BUYING @ 54714.95
Shares - USD: 0.013985

Day 144
SELL @ 53957.21
Capital - Bitcoin: 731.93

Day 170
BUYING @ 46035.00
Shares - USD: 0.015422

Day 183
SELL @ 47003.44
Capital - Bitcoin: 703.16

Day 184
BUYING @ 48808.72
Shares - USD: 0.013974

Day 194
SELL @ 46764.30
Capital - Bitcoin: 633.89

Day 200
BUYING @ 47860.58
Shares - USD: 0.012847

Day 201
SELL @ 44444.45
Capital - Bitcoin: 553.86

Day 240
BUYING @ 35289.21
Shares - USD: 0.015224

Day 257
SELL @ 38097.68
Capital - Bitcoin: 562.60

Day 259
BUYING @ 40171.47
Shares - USD: 0.013585

Day 275
SELL @ 35677.72
Capital - Bitcoin: 470

np.float64(-1.9371508352234403)