In [22]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

#only pick the 9th column
df = pd.read_csv('test.csv', usecols=[9])
# drop NaN values
df = df.dropna()
df


df
df.plot()
df.head()
#type of data
y = df.values.astype(float)
y
train_size = int(len(df)*0.93) # 93% of data for training, 7% for testing
train_data = y[:train_size] # all data except the train_size
val_data = y[train_size:] # the last 7% of the data
train_data
# define the block size, 
block_size = 128 # 128 days of data to predict the next day
batch_size = 32 # number of samples in a batch for training the model
train_data[:block_size]
from sklearn.preprocessing import MinMaxScaler
# define the scaler and fit it to the training data
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(train_data.reshape(-1, 1))

def get_batch(split):
    # Select the appropriate dataset based on the split
    data = train_data if split == 'train' else val_data

    # Generate a random index for each batch
    ix = np.random.randint(0, len(data) - block_size - 1, batch_size)

    # Extract the blocks of data for each batch
    x = [data[i:i+block_size] for i in ix]
    y = [data[i+1:i+block_size+1] for i in ix]



    # Normalize each block of x and y using the scaler fitted to the training data
    x_norm = [scaler.transform(block.reshape(-1, 1)).reshape(-1) for block in x]
    y_norm = [scaler.transform(block.reshape(-1, 1)).reshape(-1) for block in y]

    # Convert x and y to tensors
    # Convert x and y to tensors
    x = torch.tensor(x_norm, dtype=torch.float32)
    y = torch.tensor(y_norm, dtype=torch.float32)

    return x, y


# Get a batch of training data
x, y = get_batch('train')




print('inputs')
print(x.shape)
print(x)
print('targets')
print(y.shape)
print(y)

def positionalEncoding(x):
    seq_len, n_features = x.shape
    pos = torch.arange(seq_len, dtype=torch.float).reshape(-1, 1)
    i = torch.arange(n_features, dtype=torch.float).reshape(1, -1)
    angle = pos / (10000 ** (2 * (i // 2) / n_features))
    encoding = torch.zeros((seq_len, n_features))
    encoding[:, 0::2] = torch.sin(angle[:, 0::2])
    encoding[:, 1::2] = torch.cos(angle[:, 1::2])
    return x + encoding

class SelfAttention(nn.Module):
    def __init__(self, hidden_size, num_heads):
        super(SelfAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_heads = num_heads

        # Define the query, key, and value linear projections
        self.query_proj = nn.Linear(hidden_size, hidden_size)
        self.key_proj = nn.Linear(hidden_size, hidden_size)
        self.value_proj = nn.Linear(hidden_size, hidden_size)

        # Define the output linear projection
        self.output_proj = nn.Linear(hidden_size, hidden_size)

    def forward(self, inputs):
        batch_size, seq_len, hidden_size = inputs.size()

        # Project the inputs to get the queries, keys, and values
        queries = self.query_proj(inputs)
        keys = self.key_proj(inputs)
        values = self.value_proj(inputs)

        # Reshape the queries, keys, and values to have num_heads
        queries = queries.view(batch_size, seq_len, self.num_heads, hidden_size // self.num_heads).transpose(1, 2)
        keys = keys.view(batch_size, seq_len, self.num_heads, hidden_size // self.num_heads).transpose(1, 2)
        values = values.view(batch_size, seq_len, self.num_heads, hidden_size // self.num_heads).transpose(1, 2)

        # Compute the self-attention scores
        scores = torch.matmul(queries, keys.transpose(-2, -1)) / math.sqrt(hidden_size // self.num_heads)

        # Apply a mask to the scores (if provided)
        mask = torch.tril(torch.ones((seq_len, seq_len), dtype=torch.uint8)).to(inputs.device)
        scores = scores.masked_fill(mask == 0, -1e9)

        # Apply the softmax function to get the attention weights
        weights = torch.softmax(scores, dim=-1)

        # Apply the attention weights to the values
        attn_output = torch.matmul(weights, values)

        # Reshape and concatenate the attention output
        attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, hidden_size)

        # Project the attention output to get the final output
        output = self.output_proj(attn_output)

        return output, weights


class TransformerBlock(nn.Module):
    def __init__(self, hidden_size, num_heads, dropout_prob):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(hidden_size, num_heads)
        self.norm1 = nn.LayerNorm(hidden_size)
        self.norm2 = nn.LayerNorm(hidden_size)
        self.fc = nn.Linear(hidden_size, hidden_size * 4)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.dropout2 = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(hidden_size * 4, hidden_size)
    
    def forward(self, inputs):
        # Multi-Head Attention
        attn_output, _ = self.attention(inputs)

        # Add and Norm
        x = self.norm1(inputs + self.dropout1(attn_output))

        # Feed Forward
        ffn_output = self.fc(x)
        ffn_output = nn.functional.gelu(ffn_output)
        ffn_output = self.fc2(ffn_output)

        # Add and Norm
        output = self.norm2(x + self.dropout2(ffn_output))

        return output
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
        super(Encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_heads = num_heads
        self.dropout = dropout

        # Create a stack of transformer blocks
        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(hidden_size, num_heads, dropout) for _ in range(num_layers)
        ])

        # Define the input and output linear projections
        self.input_proj = nn.Linear(input_size, hidden_size)
        self.output_proj = nn.Linear(hidden_size, 1)

    def forward(self, inputs):
        # Project the inputs to the hidden size
        inputs = self.input_proj(inputs)

        # Apply the transformer blocks
        for i in range(self.num_layers):
            inputs = self.transformer_blocks[i](inputs)

        # Project the outputs to the input size
        outputs = self.output_proj(inputs)

        return outputs

class DecoderBlock(nn.Module):
    def __init__(self, hidden_size, num_heads, feedforward_size, dropout):
        super(DecoderBlock, self).__init__()
        self.self_attention = nn.MultiheadAttention(hidden_size, num_heads, dropout=dropout)
        self.enc_dec_attention = nn.MultiheadAttention(hidden_size, num_heads, dropout=dropout)
        self.feedforward = nn.Sequential(
            nn.Linear(hidden_size, feedforward_size),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(feedforward_size, hidden_size),
        )
        self.norm1 = nn.LayerNorm(hidden_size)
        self.norm2 = nn.LayerNorm(hidden_size)
        self.norm3 = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, target, memory, target_mask=None, memory_mask=None):
        # Self-attention layer
        out, _ = self.self_attention(target, target, target, attn_mask=target_mask)
        target = self.norm1(target + self.dropout(out))

        # Encoder-decoder attention layer
        out, _ = self.enc_dec_attention(target, memory, memory, attn_mask=memory_mask)
        target = self.norm2(target + self.dropout(out))

        # Feedforward neural network layer
        out = self.feedforward(target)
        target = self.norm3(target + self.dropout(out))

        return target

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, feedforward_size, dropout):
        super(Decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_heads = num_heads
        self.feedforward_size = feedforward_size
        self.dropout = dropout

        # Create a stack of decoder blocks
        self.decoder_blocks = nn.ModuleList([
            DecoderBlock(hidden_size, num_heads, feedforward_size, dropout) for _ in range(num_layers)
        ])

        # Define the input and output linear projections
        self.input_proj = nn.Linear(input_size, hidden_size)
        self.output_proj = nn.Linear(hidden_size, input_size)

    def forward(self, inputs, memory, input_mask=None, memory_mask=None):
        # Project the inputs to the hidden size
        inputs = self.input_proj(inputs)

        # Apply the decoder blocks
        for i in range(self.num_layers):
            inputs = self.decoder_blocks[i](inputs, memory, input_mask, memory_mask)

        # Project the outputs to the input size
        outputs = self.output_proj(inputs)

        return outputs

class Transformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, feedforward_size, dropout):
        super(Transformer, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, num_layers, num_heads, dropout=dropout)
        self.decoder = Decoder(input_size, hidden_size, num_layers, num_heads, feedforward_size, dropout)
        self.output_proj = nn.Linear(hidden_size, 1)  # changed output size to hidden_size

    def forward(self, inputs):
        # Encode the inputs
        enc_outputs = self.encoder(inputs)
        # Decode the inputs
        dec_outputs = self.decoder(inputs, enc_outputs)
        # Project the outputs to a single value
        outputs = self.output_proj(dec_outputs)
        return outputs

NameError: name '_C' is not defined