In [None]:
import pandas as pd
import ta
from portfolio import Portfolio
from visualisation import plot_portfolio_value, plot_trade_points
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import  TimeSeriesSplit
from sklearn.metrics import classification_report

In [None]:
def create_labels(df, look_ahead_period=1, threshold=0.01):
    """
    Adds a "Action" column to the df
    1 = buy, -1 = sell, 0 = hold
    """
    df["Future Price"] = df["Close"].shift(-look_ahead_period)
    df["Price Change"] = (df["Future Price"] - df["Close"]) / df["Close"]

    df["Action"] = 0
    df.loc[df["Price Change"] > threshold, "Action"] = 1  # We buy
    df.loc[df["Price Change"] < -threshold, "Action"] = -1

    df = df.dropna(subset=["Future Price"])
    return df


def add_technical_indicators(df, sma_short=20, sma_long=50, bollinger_window=20, 
                             macd_fast=12, macd_slow=26, macd_signal=9, rsi_window=14):
    """
    Add technical indicators as features to the DF
    """
    # Short and Long SMA
    df[f'SMA_{sma_short}'] = df['Close'].rolling(window=sma_short).mean()
    df[f'SMA_{sma_long}'] = df['Close'].rolling(window=sma_long).mean()

    # Bollinger Bands
    df[f'BB_High_{bollinger_window}'] = ta.volatility.bollinger_hband(df['Close'], window=bollinger_window)
    df[f'BB_Low_{bollinger_window}'] = ta.volatility.bollinger_lband(df['Close'], window=bollinger_window)

    # MACD (Moving Average Convergence Divergence)
    df[f'MACD_{macd_fast}_{macd_slow}'] = ta.trend.macd(df['Close'], window_slow=macd_slow, window_fast=macd_fast)
    df[f'MACD_Signal_{macd_signal}'] = ta.trend.macd_signal(df['Close'], window_slow=macd_slow, window_fast=macd_fast, window_sign=macd_signal)

    # RSI (Relative Strength Index)
    df[f'RSI_{rsi_window}'] = ta.momentum.rsi(df['Close'], window=rsi_window)

    # Back fill na values0
    df = df.fillna(method='bfill')
    return df


In [None]:
# Read in all the data
ticker = "AAPL"
data_path = f"../data/{ticker}_processed_hourly_data.csv"
data = pd.read_csv(data_path, index_col='Datetime', parse_dates=True)

In [None]:
prep_data = create_labels(data)
prep_data = add_technical_indicators(prep_data)

In [None]:
# Define feature columns (all columns except 'Close', 'Future Price', 'Price Change', and 'Action')
feature_columns = [col for col in data.columns if col not in ['Close', 'Future Price', 'Price Change', 'Action']]
X = prep_data[feature_columns]
y = prep_data['Action']

# Use Time Series Split for time series cross over
time_series_splits = TimeSeriesSplit(n_splits=5)

for fold, (train_index, test_index) in enumerate(time_series_splits.split(X)):
    # Split data into training and testing sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train the model on the training set for this fold
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict and evaluate on the test set for this fold
    y_pred = model.predict(X_test)
    print(f"Fold {fold + 1} Evaluation:")
    print(classification_report(y_test, y_pred))

In [None]:
# Initialise the portfolio
portfolio = Portfolio(100000)

In [None]:
# Predict labels on the entire dataset (use only feature columns)
prep_data['Predicted Label'] = model.predict(X)
prep_data['Actions'] = None
prep_data['Portfolio Value'] = None

# Integrate predictions into backtesting loop
for index, row in prep_data.iterrows():
    prep_data.at[index, 'Portfolio Value'] = portfolio.get_value(row['Close'])
  
    if row['Predicted Label'] == 1 and not portfolio.holdings:
        portfolio.buy(row['Close'], 250)
        prep_data['Actions'] = 1
        
    elif row['Predicted Label'] == -1 and portfolio.holdings:
        portfolio.sell(row['Close'], 250)
        prep_data['Actions'] = -1

print(f"Portfolio Final Value = {portfolio.get_value(prep_data.iloc[-1]['Close'])}")

# Calc returns
prep_data['Returns'] = prep_data['Portfolio Value'].pct_change().dropna()

In [None]:
print(prep_data['Portfolio Value'])

In [None]:
plot_portfolio_value(prep_data, "RandomForestClassifier")

In [None]:
plot_trade_points(prep_data, "Random Forest Classifier")