In [1]:
import pandas as pd
# load data from log-returns

df_log_returns = pd.read_csv('squashed_log_returns.csv')

print(df_log_returns.head())

       AAPL      AMZN      GOOG      MSFT
0 -0.000076  0.005513  0.007070  0.002016
1  0.002013  0.001940  0.001570  0.003806
2  0.004917  0.006963  0.006282  0.005351
3 -0.001616  0.006220  0.001852  0.000443
4 -0.000050  0.002026 -0.000267 -0.000295


In [2]:
import numpy as np

#split into windows
WINDOW_SIZE = 4 #time windows of 4

df = pd.DataFrame(columns=['context','features','raw_label'])
    
for context, ticker in enumerate(df_log_returns.columns):
        
    stock_series = df_log_returns[ticker]
        
    # Slide a window across this one stock's time series
    # We stop (window_size - 1) from the end
    for i in range(len(stock_series) - WINDOW_SIZE + 1):
            
        # The full window (e.g., 4 log-returns)
        window = stock_series.iloc[i : i + WINDOW_SIZE]
            
        # Features are the first N-1 (e.g., 3)
        features = window[:-1].values
            
        # Label is the last one (e.g., the 4th)
        label = window.iloc[-1]

        df.loc[len(df)] = [context, features, label]

print(df.head())

   context                                           features  raw_label
0        0  [-7.570098556833607e-05, 0.002012513590532, 0....  -0.001616
1        0  [0.002012513590532, 0.0049166646207804, -0.001...  -0.000050
2        0  [0.0049166646207804, -0.0016160763885611, -4.9...  -0.000100
3        0  [-0.0016160763885611, -4.977979782626501e-05, ...   0.002460
4        0  [-4.977979782626501e-05, -9.973891197457896e-0...   0.004462


In [3]:
# Convert labels to bins
N_BINS = 2

context_min = df.groupby('context')['raw_label'].transform('min')
context_max = df.groupby('context')['raw_label'].transform('max')

midpoint = (context_min + context_max) / 2

df['label'] = (df['raw_label'] > midpoint).astype(int)

print(df.head(20))

    

    context                                           features  raw_label  \
0         0  [-7.570098556833607e-05, 0.002012513590532, 0....  -0.001616   
1         0  [0.002012513590532, 0.0049166646207804, -0.001...  -0.000050   
2         0  [0.0049166646207804, -0.0016160763885611, -4.9...  -0.000100   
3         0  [-0.0016160763885611, -4.977979782626501e-05, ...   0.002460   
4         0  [-4.977979782626501e-05, -9.973891197457896e-0...   0.004462   
5         0  [-9.973891197457896e-05, 0.0024599308278872, 0...  -0.002213   
6         0  [0.0024599308278872, 0.0044615553560915, -0.00...   0.007114   
7         0  [0.0044615553560915, -0.0022127250514393, 0.00...   0.000388   
8         0  [-0.0022127250514393, 0.007114255558871, 0.000...  -0.001942   
9         0  [0.007114255558871, 0.0003877178948868, -0.001...  -0.003568   
10        0  [0.0003877178948868, -0.001942374603717, -0.00...   0.000098   
11        0  [-0.001942374603717, -0.0035676390655656, 9.81...  -0.006973   

In [4]:
# split into train and test sets

TRAIN_RATIO = 0.8  # take the first 80% of df as requested

# Compute split index using the row order (first 80%)
n_rows = len(df)
split_index = int(n_rows * TRAIN_RATIO)

# First 80% (preserve original order); create df_test for remainder
df_train = df.iloc[:split_index].reset_index(drop=True)
df_test = df.iloc[split_index:].reset_index(drop=True)

print(df_train.shape)
print(df_test.shape)

(4816, 4)
(1204, 4)


In [5]:
import pennylane as qml
#Build the circuit

n_stocks = df_log_returns.shape[1]

# 0-1: context qubits
# 2-4: input qubits
# 5: output qubit
N_CONTEXT_WIRES = int(np.log2(n_stocks))  # Number of qubits needed to represent stocks
N_INPUT_WIRES = df_train['features'].loc[0].shape[0]  # Should be 3
N_OUTPUT_WIRES = int(np.log2(N_BINS))
N_TOTAL_WIRES = N_CONTEXT_WIRES + N_INPUT_WIRES + N_OUTPUT_WIRES

N_LAYERS = 2

LEARNING_RATE = 0.01
N_EPOCHS = 10

CONTEXT_WIRES = list(range(N_CONTEXT_WIRES))
INPUT_WIRES = list(range(N_CONTEXT_WIRES, N_CONTEXT_WIRES + N_INPUT_WIRES))
OUTPUT_WIRES = list(range(N_CONTEXT_WIRES + N_INPUT_WIRES, N_TOTAL_WIRES))
COMP_WIRES = list(range(N_CONTEXT_WIRES, N_CONTEXT_WIRES + N_OUTPUT_WIRES + N_INPUT_WIRES))

dev = qml.device("default.qubit", wires=6)

#input layer
def U_in(features):
    """The data encoding block (feature map)."""
    # We use arctan to squash features, as discussed
    for i, wire in enumerate(INPUT_WIRES):
        qml.RY(features[i], wires=wire)

# layer for both either the shared or specify ansatz (input is just trainable parameters)
def U_ss(params):
    """A single (L=1) ansatz layer (Rotations + CNOTs)."""
    # 1. Trainable Rotations
    for i, wire in enumerate(COMP_WIRES):
        qml.RY(params[i], wires=wire)
    
    # 2. Entangling "Ring"
    for i in range(len(COMP_WIRES)):
        qml.CNOT(wires=[COMP_WIRES[i], COMP_WIRES[(i + 1) % len(COMP_WIRES)]])

@qml.qnode(dev, interface='torch', diff_method='parameter-shift')
def qmtl_circuit(params, features, context):
    # Encode input features into qubits 2, 3, 4
    U_in(features)

    #Implement shared variational layers
    for l in range(N_LAYERS):
        U_ss(params['shared'][l])

    #TODO: Implement actual control gates for specify layers (allows for superposition of contexts later)
    for l in range(N_LAYERS):
        U_ss(params['spec'][context][l])

    # Measure output qubit (wire 5)
    return qml.probs(wires=N_CONTEXT_WIRES + N_INPUT_WIRES)


In [None]:
if False: #Test the circuit with dummy parameters

    for index, row in df_train[:1].iterrows(): #only first element for testing
        print(type(row))
        params = dict()
        params['shared'] = [[0.1, 0.2, 0.3, 0.4],[0.5, 0.6, 0.7, 0.8]]
        params['spec'] = [
        [[-0.1, -0.1, -0.1, -0.1], [-0.2, -0.2, -0.2, -0.2]],
            [[-0.3, -0.3, -0.3, -0.3], [-0.4, -0.4, -0.4, -0.4]],
            [[-0.5, -0.5, -0.5, -0.5], [-0.6, -0.6, -0.6, -0.6]],
            [[-0.7, -0.7, -0.7, -0.7], [-0.8, -0.8, -0.8, -0.8]]]
        # Execute the circuit
        result = qmtl_circuit(params, row['features'], row['context'])
        print("Features:", features, "Output expectation:", result)

        drawing = qml.draw(qmtl_circuit)(params, row['features'], row['context'])
        print(drawing)


<class 'pandas.core.series.Series'>
Features: [ 9.27878004e-05 -6.84386080e-04  1.40243475e-03] Output expectation: tensor([0.9179, 0.0821], dtype=torch.float64)
2: ──RY(-0.00)──RY(0.10)─╭●───────╭X──RY(0.50)─╭●───────╭X──RY(-0.10)─╭●───────╭X──RY(-0.20)─╭● ···
3: ──RY(0.00)───RY(0.20)─╰X─╭●────│───RY(0.60)─╰X─╭●────│───RY(-0.10)─╰X─╭●────│───RY(-0.20)─╰X ···
4: ──RY(0.00)───RY(0.30)────╰X─╭●─│───RY(0.70)────╰X─╭●─│───RY(-0.10)────╰X─╭●─│───RY(-0.20)─── ···
5: ──RY(0.40)──────────────────╰X─╰●──RY(0.80)───────╰X─╰●──RY(-0.10)───────╰X─╰●──RY(-0.20)─── ···

2: ··· ───────╭X─┤       
3: ··· ─╭●────│──┤       
4: ··· ─╰X─╭●─│──┤       
5: ··· ────╰X─╰●─┤  Probs


In [7]:
import torch
import torch.nn as nn

LEARNING_RATE = 0.01
N_EPOCHS = 10

def init_params():
    """
    Initialize the trainable parameters using torch.nn.Parameter
    """
    n_params_per_layer = len(COMP_WIRES) # 3 input + 1 output = 4
    
    # --- Shared Parameters ---
    # We need L=2 layers of 4 params each
    shared_params = torch.randn(N_LAYERS, n_params_per_layer, requires_grad=True)
    
    # --- Specify Parameters ---
    # We need K=4 sets of (L=2 layers * 4 params each)
    n_stocks = 2**N_CONTEXT_WIRES
    spec_params = torch.randn(n_stocks, N_LAYERS, n_params_per_layer, requires_grad=True)
    
    # We use torch.nn.ParameterDict to keep them organized
    return nn.ParameterDict({
        "shared": nn.Parameter(shared_params),
        "spec": nn.Parameter(spec_params)
    })

#Train the circuit
params = init_params()

optimizer = torch.optim.Adam(params.values(), lr=LEARNING_RATE)

#KL-Divergence used
loss_fn = nn.KLDivLoss()

print("\n--- Starting Training ---")
    
    # --- Run Epochs ---
for epoch in range(N_EPOCHS):
    total_loss = 0
    
    # We loop through the training set one sample at a time
    # (This is Stochastic Gradient Descent, Batch Size = 1)
    # TODO: Increase batch size for more efficiency
    for index, row in df_train.iloc[:50].iterrows(): #Only first 50 for testing purposes
        
        # Create the one-hot true label vector [P(0), P(1)]
        y_true_onehot = torch.tensor([0.0, 0.0])
        y_true_onehot[row['label']] = 1.0
        
        # 2. Run circuit, y_pred = [P(0), P(1)]
        y_pred = qmtl_circuit(params, row['features'], row['context'])
        
        # Calculate Loss
        # **CRITICAL**: KLDivLoss expects log-probabilities
        loss = loss_fn(torch.log(y_pred), y_true_onehot)
        
        # Calculate Gradients
        # PyTorch and PennyLane work together to run the parameter-shift rule for ALL parameters.
        loss.backward()
        
        # Update parameters
        optimizer.step()
        
        # Clear gradients
        optimizer.zero_grad()
        
        total_loss += loss.item()
        
    avg_loss = total_loss / len(df_train)
    print(f"Epoch {epoch+1}/{N_EPOCHS} - Avg. Loss: {avg_loss:.4f}")

    print("--- Training Complete ---")
    print("Final Parameters (Shared):")
    print(params['shared'])
    print("Final Parameters (Specify):")
    print(params['spec'])


--- Starting Training ---




Epoch 1/10 - Avg. Loss: 0.0026
--- Training Complete ---
Final Parameters (Shared):
Parameter containing:
tensor([[ 0.0663,  1.0921, -1.1503,  0.7243],
        [-0.7098,  0.2456, -0.2202,  1.4512]], requires_grad=True)
Final Parameters (Specify):
Parameter containing:
tensor([[[-1.7667, -0.4806, -1.2539,  0.7395],
         [ 1.5891,  0.6025, -1.5270,  0.8520]],

        [[ 0.6428, -0.2284,  1.8288,  1.2051],
         [ 1.1416, -1.2565, -0.3173, -3.1662]],

        [[ 0.3604, -1.3728,  0.2914, -0.3286],
         [-0.8405, -0.2691, -1.3748,  0.3163]],

        [[ 1.5240, -1.0766,  1.3405,  1.3979],
         [-1.7964,  0.8130,  1.4642, -0.9760]]], requires_grad=True)
Epoch 2/10 - Avg. Loss: 0.0025
--- Training Complete ---
Final Parameters (Shared):
Parameter containing:
tensor([[ 0.0611,  1.1070, -1.1559,  0.7240],
        [-0.7098,  0.2484, -0.2511,  1.4537]], requires_grad=True)
Final Parameters (Specify):
Parameter containing:
tensor([[[-1.7423, -0.4791, -1.2786,  0.7147],
         [ 