<a href="https://colab.research.google.com/github/Ekliipce/Representation-and-Generative-Learning/blob/main/Simple_attention_model_to_predict_a_financial_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple attention model to predict a financial time series

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from torch import nn
from typing import Tuple



In [3]:
def prepare_dataset(filename: str, n: int = 5) -> Tuple[np.ndarray, np.ndarray]:
    """
    Reads the file and prepares the dataset for training.
    The dataset consists of vectors of 5 elements: open, high, low, close, volume.
    An x is made n vectors of ohlcv values in the past and the y is the ohlcv value of the next day.

    xs: (batch, n, 5)
    ys: (batch, 5)

    :param filename: the data file to read
    :param n: the number of days to look back
    :return: tuple of xs and ys
    """
    import pandas as pd
    import numpy as np
    from typing import Tuple

    # Read the file using pandas
    columns = ["name", "date", "open", "high", "low", "close", "volume"]
    df = pd.read_csv(filename, names=columns)

    # Filter for a specific stock
    stock_orange = df[df['name'] == 'Orange']

    # Drop unnecessary columns
    stock_orange = stock_orange.drop(columns=["name", "date"])

    # Correct the column names and replace commas in 'volume'
    stock_orange['volume'] = stock_orange['volume'].str.replace(",", ".").astype('float')

    # Check if all required columns are present
    required_columns = ["open", "high", "low", "close", "volume"]
    if not all(col in stock_orange.columns for col in required_columns):
        raise ValueError(f"Missing columns in the DataFrame. Required columns: {required_columns}")

    # Drop rows with NaN values
    stock_orange.dropna(inplace=True)

    # Convert all columns to float32
    stock_orange = stock_orange.astype("float32")

    # Normalize each column
    for col in required_columns:
        mean = stock_orange[col].mean()
        std = stock_orange[col].std()
        stock_orange[col] = (stock_orange[col] - mean) / std

    # Prepare the dataset
    xs, ys = [], []
    for i in range(len(stock_orange) - n):
        xs.append(stock_orange.iloc[i:i+n][required_columns].values)
        ys.append(stock_orange.iloc[i+n][required_columns].values)

    return np.array(xs), np.array(ys)


In [4]:
xs, ys = prepare_dataset("data.csv", 5)
xs.shape, ys.shape

FileNotFoundError: ignored

In [1]:
class SelfAttention(nn.Module):
    def __init__(self, input_dim: int, output_dim: int):
        super().__init__()
        wq = nn.Linear(input_dim)
        wk = nn.Linear(input_dim)
        wv = nn.Linear(input_dim)


    def forward(self, x: torch.Tensor):
      query_representations = wq(x)
      key_representations = wk(x)
      value_representations = wv(x)

      attention_score = softmax(
        query_representations @ key_representations.T / query_representations_dim,
        axis=1
      )

NameError: ignored