In [1]:
# Imported python libraries
# To import, run "conda install -c conda-forge <package1> <package2>" in the Anaconda prompt

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import time
from scipy import linalg
import matplotlib.dates as mdates
from datetime import datetime, timedelta

In [2]:
# Function to load NVIDIA data from exported files
# Function to load NVIDIA data from exported files
def load_nvidia_data(base_dir='data'):
    """
    Parameters:
    -----------
    base_dir : Base directory where data files are stored
    
    Returns:
    --------
    Dictionary containing loaded data
    """
    # Define directory paths
    csv_dir = f'{base_dir}'
    train_dir = f'{base_dir}/npy/train'
    test_dir = f'{base_dir}/npy/test'
    
    # Loads CSV data
    technical_data = pd.read_csv(f'{csv_dir}/nvidia_technical_data.csv', index_col=0, parse_dates=True, date_format="%d/%m/%Y")
    normalized_data = pd.read_csv(f'{csv_dir}/nvidia_normalized_data.csv', index_col=0, parse_dates=True, date_format="%d/%m/%Y")
    ceflann_data = pd.read_csv(f'{csv_dir}/nvidia_ceflann_data.csv', index_col=0, parse_dates=True, date_format="%d/%m/%Y")
    
    # Loads NumPy arrays for training and testing
    X_train = np.load(f'{train_dir}/nvidia_X_train.npy')
    y_train = np.load(f'{train_dir}/nvidia_y_train.npy')
    train_dates = np.load(f'{train_dir}/nvidia_train_dates.npy', allow_pickle=True)
    train_prices = np.load(f'{train_dir}/nvidia_train_price_data.npy')
    
    # Load main test data
    X_test = np.load(f'{test_dir}/nvidia_X_test.npy')
    y_test = np.load(f'{test_dir}/nvidia_y_test.npy')
    test_dates = np.load(f'{test_dir}/nvidia_test_dates.npy', allow_pickle=True)
    test_prices = np.load(f'{test_dir}/nvidia_price_data.npy')
    
    # Try to load multiple test weeks if available
    test_weeks = []
    test_weeks_info = np.load(f'{base_dir}/npy/nvidia_test_weeks_info.npy', allow_pickle=True)
    
    # Load each test week
    for i in range(len(test_weeks_info)):
        try:
            week_data = {
                'X': np.load(f'{test_dir}/nvidia_X_test_week_{i}.npy'),
                'y': np.load(f'{test_dir}/nvidia_y_test_week_{i}.npy'),
                'dates': np.load(f'{test_dir}/nvidia_test_dates_week_{i}.npy', allow_pickle=True),
                'prices': np.load(f'{test_dir}/nvidia_price_data_week_{i}.npy'),
                'info': test_weeks_info[i]
            }
            test_weeks.append(week_data)
        except Exception as e:
            print(f"Warning: Could not load test week {i}: {e}")
    
    # Loads latest info
    with open(f'{csv_dir}/nvidia_info.txt', 'r') as f:
        latest_info = f.read().split(',')
        latest_date = latest_info[0]
        latest_close = float(latest_info[1])
        test_week_length = int(latest_info[2]) if len(latest_info) > 2 else 5
        num_test_weeks = int(latest_info[3]) if len(latest_info) > 3 else 0
    
    return {
        'technical_data': technical_data,
        'normalized_data': normalized_data,
        'ceflann_data': ceflann_data,
        'X_train': X_train,
        'y_train': y_train,
        'train_dates': train_dates,
        'train_prices': train_prices,
        'X_test': X_test,
        'y_test': y_test,
        'test_dates': test_dates,
        'test_prices': test_prices,
        'test_weeks': test_weeks,
        'test_weeks_info': test_weeks_info,
        'latest_date': latest_date,
        'latest_close': latest_close,
        'test_week_length': test_week_length,
        'num_test_weeks': num_test_weeks
    }

In [3]:
# This class implements the CEFLANN model with ELM learning
class CEFLANN:
    """
    Computational Efficient Functional Link Artificial Neural Network
    
    This implementation follows the paper "A hybrid stock trading framework integrating 
    technical analysis with machine learning techniques" by Dash & Dash (2016)
    """
    
    def __init__(self, expansion_order=5, regularization=0.01):
        """
        Parameters:
        -----------
        expansion_order(int) : The order of expansion for the functional expansion block
        regularization(float) : Regularization parameter for the ELM learning
        """
        self.expansion_order = expansion_order
        self.regularization = regularization
        self.output_weights = None
        self.expansion_params = None
        
    def _functional_expansion(self, X):
        """
        Parameters:
        -----------
        X : Input feature matrix with shape (n_samples, n_features)
            
        Returns:
        --------
        expanded_X : Expanded input pattern array
        """
        n_samples, n_features = X.shape
        
        # Number of expanded features = original features + expansion order
        n_expanded = n_features + self.expansion_order
        
        # Initializes the expanded feature matrix
        expanded_X = np.zeros((n_samples, n_expanded))
        expanded_X[:, :n_features] = X
        
        
        if self.expansion_params is None:
            # For each order i, initializes parameters a_i0 and a_ij (j is from 1 to n_features)
            self.expansion_params = []
            for i in range(self.expansion_order):
                # Initialize bias term a_i0
                a_i0 = np.random.uniform(-1, 1)
                
                # Initialize weights a_ij for each feature
                a_ij = np.random.uniform(-1, 1, size=n_features)
                
                self.expansion_params.append((a_i0, a_ij))
        
        # Applies functional expansion for each order
        for i in range(self.expansion_order):
            a_i0, a_ij = self.expansion_params[i]
            
            # Calculates weighted sum of input features
            weighted_sum = a_i0 + np.dot(X, a_ij)
            
            # Applies a tanh activation function
            expanded_X[:, n_features + i] = np.tanh(weighted_sum)
            
        return expanded_X
    
    def fit(self, X, y):
        """
        Parameters:
        -----------
        X : Training feature matrix of shape (n_samples, n_features)
        y : Target values array of shape (n_samples,)
            
        Returns:
        --------
        self : Returns self
        """
        # Applies functional expansion to transform input features
        expanded_X = self._functional_expansion(X)
        
        # Adds the bias to the expanded input
        n_samples = expanded_X.shape[0]
        bias_col = np.ones((n_samples, 1))
        M = np.hstack((bias_col, expanded_X))
        
        # Uses regularized least squares (Ridge Regression) for output weights        
        n_cols = M.shape[1]
        MtM = np.dot(M.T, M)
        reg_term = self.regularization * np.eye(n_cols)
        inverse_term = np.linalg.inv(MtM + reg_term)
        MP_inverse = np.dot(inverse_term, M.T)
        
        # Calculates output weights
        self.output_weights = np.dot(MP_inverse, y)
        
        return self
    
    def predict(self, X):
        """
        Parameters:
        -----------
        X : Test feature matrix of shape (n_samples, n_features)
            
        Returns:
        --------
        y_pred : Array of predicted values
        """
        # Applies functional expansion to transform input features
        expanded_X = self._functional_expansion(X)
        
        # Adds bias to expanded input
        n_samples = expanded_X.shape[0]
        bias_col = np.ones((n_samples, 1))
        M = np.hstack((bias_col, expanded_X))
        
        y_pred = np.dot(M, self.output_weights)
        
        return y_pred
    
    def evaluate(self, X, y):
        """
        Parameters:
        -----------
        X : Test feature matrix array
        y : Target values array
            
        Returns:
        --------
        mse : Mean squared error
        """
        y_pred = self.predict(X)
        mse = mean_squared_error(y, y_pred)
        return mse

In [4]:
# Trains the model on the training dataset
def train_model(X_train, y_train, expansion_order=5, regularization=0.01):
    """
    Parameters:
    -----------
    X_train : Training features
    y_train : Training targets
    expansion_order : Order of expansion for functional expansion block
    regularization : Regularization parameter for ELM learning
    
    Returns:
    --------
    trained_model : Trained CEFLANN model
    training_time : Time taken to train the model
    """
    model = CEFLANN(expansion_order=expansion_order, regularization=regularization)
    
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    
    return model, training_time

In [5]:
def get_simulation_week(data, start_date='2025-03-10', end_date='2025-03-14'):
    """
    Extract data for the simulation week (March 24-28, 2025)
    
    Parameters:
    -----------
    data : Dictionary containing loaded data
    start_date : Start date of simulation week (default: March 24, 2025)
    end_date : End date of simulation week (default: March 28, 2025)
    
    Returns:
    --------
    Dictionary containing data for the simulation week
    """
    # Convert string dates to datetime objects
    start = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)
    
    # Try to find the simulation week in the test weeks
    simulation_week = None
    
    for week in data['test_weeks']:
        week_start = pd.to_datetime(week['dates'][0])
        week_end = pd.to_datetime(week['dates'][-1])
        
        # Check if there's overlap with our simulation period
        if (week_start <= end and week_end >= start):
            simulation_week = week
            break
    
    # If we couldn't find the exact week, use the most recent test week
    if simulation_week is None:
        print(f"Warning: Could not find exact simulation week. Using most recent test week as a proxy.")
        simulation_week = data['test_weeks'][-1]
    
    # Extract the technical indicators for this week from the full dataset
    week_dates = pd.to_datetime(simulation_week['dates'])
    technical_data_week = data['technical_data'].loc[week_dates[0]:week_dates[-1]]
    
    return {
        'X': simulation_week['X'],
        'y': simulation_week['y'],
        'dates': simulation_week['dates'],
        'prices': simulation_week['prices'],
        'technical_data': technical_data_week,
        'info': simulation_week['info'] if 'info' in simulation_week else None
    }

In [6]:
def prepare_data_for_ceflann(data, target_week=None):
    """
    Prepare data in the format required by the CEFLANN model
    
    Parameters:
    -----------
    data : Dictionary containing loaded data
    target_week : Optional specific week to prepare (if None, uses main test data)
    
    Returns:
    --------
    Dictionary containing prepared data for CEFLANN model
    """
    if target_week:
        X = target_week['X']
        y = target_week['y']
        dates = target_week['dates']
        prices = target_week['prices']
    else:
        X = data['X_test']
        y = data['y_test']
        dates = data['test_dates']
        prices = data['test_prices']
    
    # Ensure X and y are properly formatted as numpy arrays
    X = np.array(X).astype(float)
    y = np.array(y).astype(float)
    
    # Check for any NaN values and handle them
    if np.isnan(X).any():
        print("Warning: Input features contain NaN values. Replacing with zeros.")
        X = np.nan_to_num(X, nan=0.0)
    
    if np.isnan(y).any():
        print("Warning: Target values contain NaN values. Replacing with mean.")
        mean_y = np.nanmean(y)
        y = np.nan_to_num(y, nan=mean_y)
    
    return {
        'X': X,
        'y': y,
        'dates': dates,
        'prices': prices
    }

In [7]:
def convert_model_output_to_trading_signal(predictions, threshold=0.5):
    """
    Convert model predictions to trading signals (0 = downtrend, 1 = uptrend)
    
    Parameters:
    -----------
    predictions : Model output predictions (values in range 0-1)
    threshold : Threshold value for classification (default: 0.5)
    
    Returns:
    --------
    Numpy array of trend signals (0 or 1)
    """
    return (predictions > threshold).astype(int)

In [8]:
# Load all NVIDIA data
nvidia_data = load_nvidia_data()

# Print summary of loaded data
print(f"Data loaded successfully. Latest date: {nvidia_data['latest_date']}")
print(f"Latest NVIDIA close price: ${nvidia_data['latest_close']:.2f}")
print(f"Training data shape: X={nvidia_data['X_train'].shape}, y={nvidia_data['y_train'].shape}")
print(f"Testing data shape: X={nvidia_data['X_test'].shape}, y={nvidia_data['y_test'].shape}")
print(f"Number of test weeks available: {len(nvidia_data['test_weeks'])}")

Data loaded successfully. Latest date: 2025-03-14 00:00:00
Latest NVIDIA close price: $121.67
Training data shape: X=(1031, 6), y=(1031,)
Testing data shape: X=(5, 6), y=(5,)
Number of test weeks available: 54


In [9]:
# Get simulation week data (March 24-28, 2025)
simulation_week = get_simulation_week(nvidia_data)
print("\nSimulation Week Data:")
print(f"- Dates: {simulation_week['dates'][0]} to {simulation_week['dates'][-1]}")
print(f"- Number of trading days: {len(simulation_week['dates'])}")
print(f"- Features shape: X={simulation_week['X'].shape}")

TypeError: '<' not supported between instances of 'str' and 'Timestamp'

In [None]:
# Prepare data for CEFLANN model
prepared_data = prepare_data_for_ceflann(nvidia_data, simulation_week)
print("\nData prepared for CEFLANN model:")
print(f"- Input features shape: {prepared_data['X'].shape}")
print(f"- Target values shape: {prepared_data['y'].shape}")

In [None]:
# Visualize the simulation week data
plt.figure(figsize=(12, 6))
plt.plot(pd.to_datetime(simulation_week['dates']), simulation_week['prices'], marker='o')
plt.title('NVIDIA Stock Prices - Simulation Week (March 24-28, 2025)')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()