In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from matplotlib import pyplot
import pandas as pd
import numpy as np
from matplotlib import pyplot
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns 
file_path = 'data\BB_Rates_15min_from_10-2023.xlsx'
data = pd.read_excel(file_path)


In [20]:
start_index = 3
EURUSD = data.iloc[start_index:, :2]

# Rename the columns
EURUSD.columns = ['Dates', 'Close']

In [21]:
print(EURUSD.tail())

                     Dates   Close
13539  2024-04-17 09:30:00  1.0632
13540  2024-04-17 09:45:00  1.0625
13541  2024-04-17 10:00:00   1.063
13542  2024-04-17 10:15:00  1.0642
13543  2024-04-17 10:30:00  1.0647


In [22]:
time_interval = pd.Timedelta(minutes=15)
# Convert the 'Dates' column to datetime format
EURUSD['Dates'] = pd.to_datetime(EURUSD['Dates'])

# Calculate the difference between consecutive dates
EURUSD['TimeDiff'] = EURUSD['Dates'].diff()

# Identify the sections with consecutive 15-minute intervals
EURUSD['IsConsecutive'] = EURUSD['TimeDiff'] == time_interval
EURUSD['Block'] = (EURUSD['IsConsecutive'] != EURUSD['IsConsecutive'].shift()).cumsum()

In [23]:
# Filter out sections with fewer than 2 days of consecutive 15-minute observations
min_consecutive_minutes = 2 * 24 * 60  # 2 days in minutes
valid_blocks = EURUSD.groupby('Block').filter(lambda x: len(x) >= min_consecutive_minutes / 15).Block.unique()
EURUSD_filtered = EURUSD[EURUSD['Block'].isin(valid_blocks)]

# Add the observation timeframe column
EURUSD_filtered['TimeFrame'] = EURUSD_filtered.groupby('Block').cumcount() + 1

# Drop the 'TimeDiff' and 'IsConsecutive' columns as they are no longer needed
EURUSD_filtered = EURUSD_filtered.drop(columns=['TimeDiff', 'IsConsecutive'])

# Display the first few rows of the filtered dataset with the new 'Block' and 'TimeFrame' columns
print(EURUSD_filtered.head())

                Dates   Close  Block  TimeFrame
4 2023-10-03 23:15:00  1.0466      2          1
5 2023-10-03 23:30:00  1.0465      2          2
6 2023-10-03 23:45:00  1.0465      2          3
7 2023-10-04 00:00:00  1.0468      2          4
8 2023-10-04 00:15:00  1.0467      2          5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  EURUSD_filtered['TimeFrame'] = EURUSD_filtered.groupby('Block').cumcount() + 1


In [24]:
block_mapping = {old_block: new_block for new_block, old_block in enumerate(EURUSD_filtered['Block'].unique(), 1)}

In [25]:
EURUSD_filtered['Block'] = EURUSD_filtered['Block'].map(block_mapping)

In [26]:
EURUSD_filtered = EURUSD_filtered.reset_index(drop=True)
EURUSD_filtered = EURUSD_filtered.drop(columns=['TimeFrame'])
EURUSD_filtered.head()

Unnamed: 0,Dates,Close,Block
0,2023-10-03 23:15:00,1.0466,1
1,2023-10-03 23:30:00,1.0465,1
2,2023-10-03 23:45:00,1.0465,1
3,2023-10-04 00:00:00,1.0468,1
4,2023-10-04 00:15:00,1.0467,1


In [27]:
from sklearn.preprocessing import MinMaxScaler

def create_dataset(lookback, forecast_horizon, data):
        X, Y = [], []
        scaler = MinMaxScaler()
        for i in range(len(data) - lookback - forecast_horizon + 1):
            if data.iloc[i]['Block'] == data.iloc[i + lookback + forecast_horizon - 1]['Block']:
                X_values = data.iloc[i:i+lookback]['Close'].values
                Y_values = data.iloc[i+lookback:i+lookback+forecast_horizon]['Close'].values
                X_scaled = scaler.fit_transform(X_values.reshape(-1, 1))
                Y_scaled = scaler.transform(Y_values.reshape(-1, 1))
                X.append(X_scaled)
                Y.append(Y_scaled)
        return np.array(X), np.array(Y)

In [28]:
X, Y = create_dataset(lookback=96, forecast_horizon=96, data=EURUSD_filtered)

In [29]:
X = torch.tensor(X, dtype=torch.float32)
Y = torch.tensor(Y, dtype=torch.float32)

dataset = TensorDataset(X, Y)
#dataloader = DataLoader(dataset, batch_size=96, shuffle=True)

In [30]:
train_size = int(0.8*len(dataset))
test_size = len(dataset)-train_size
print(train_size)
print(test_size)



6315
1579


In [31]:
train_data, test_data = train_test_split(dataset, test_size=test_size, random_state=42)
train_data = torch.tensor(np.array(train_data), dtype=torch.float32)
test_data = torch.tensor(np.array(test_data), dtype=torch.float32)

test_data = test_data.view(-1,2, 96)
train_data = train_data.view(-1, 2, 96)
train_data.shape
batch_size = 64
train_loader  = torch.utils.data.DataLoader(train_data, batch_size= batch_size, shuffle=True)
test_loader   = torch.utils.data.DataLoader(test_data, batch_size= batch_size, shuffle=True)

In [90]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(Encoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bi_lstm = nn.LSTM(input_dim, hidden_dim, num_layers, bidirectional=True, batch_first=True)
                 
    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        output, (h_n, c_n) = self.bi_lstm(x)
        # h_n: (num_layers, batch_size, hidden_size)
        # c_n: (num_layers, batch_size, hidden_size)
        return output, h_n, c_n
        

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.hidden_dim = hidden_dim
        self.attn = nn.Linear(self.hidden_dim*2, hidden_dim)  # *2 für bidirektional
        self.v = nn.Linear(hidden_dim, 1, bias=False)
        
    def forward(self, encoder_outputs, hidden):
        timestep = encoder_outputs.size(1)
        h = hidden[-1].unsqueeze(1).repeat(1, timestep, 1)
        attn_energies = self.score(h, encoder_outputs)
        return torch.softmax(attn_energies, dim=1)
    
    def score(self, hidden, encoder_outputs):
        energy = torch.tanh(self.attn(torch.cat([hidden, encoder_outputs], dim=2)))
        energy = self.v(energy).squeeze(2)
        return energy

class ProbabilisticDecoder(nn.Module):
    def __init__(self, output_dim, hidden_dim, num_layers):
        super(ProbabilisticDecoder, self).__init__()
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.attention = Attention(hidden_dim)
        self.lstm = nn.LSTM(hidden_dim*2, hidden_dim, num_layers, batch_first=True)  # *2 für bidirektionale Ausgabe
        self.fc_mean = nn.Linear(hidden_dim, output_dim)
        self.fc_std = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x, encoder_outputs, hidden, cell):
        attn_weights = self.attention(encoder_outputs, hidden)
        context = attn_weights.unsqueeze(1).bmm(encoder_outputs)
        
        lstm_input = torch.cat([x, context], dim=2)
        output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
        
        mean = self.fc_mean(output.squeeze(1))
        std = torch.exp(self.fc_std(output.squeeze(1)))  # Ensure std is positive
        return mean, std, hidden, cell

class ProbabilisticSeq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(ProbabilisticSeq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, src, trg, trg_len):
        encoder_outputs, hidden, cell = self.encoder(src)
        means = torch.zeros(trg.size(0), trg.size(1), self.decoder.output_dim).to(src.device)
        stds = torch.zeros(trg.size(0), trg.size(1), self.decoder.output_dim).to(src.device)
        

        ##hier ist der fehler irgendwo
        # Initialisierung des ersten Decodereingangs
        print(trg.shape)
        input = trg[ 0, :].unsqueeze(1)  # Hier stellen wir sicher, dass die Form (batch_size, 1, 1) ist
        
        for t in range(1, trg_len):
            mean, std, hidden, cell = self.decoder(input, encoder_outputs, hidden, cell)
            means[:, t, :] = mean
            stds[:, t, :] = std
            input = mean.unsqueeze(1)  # Der nächste Decodereingang hat ebenfalls die Form (batch_size, 1, 1)
        
        return means, stds

def negative_log_likelihood(y_true, mean, std):
    # Log-Likelihood of a Gaussian
    return torch.mean(0.5 * torch.log(2 * np.pi * std**2) + (y_true - mean)**2 / (2 * std**2))


In [87]:
# Modell initialisieren
input_dim = X.size(1)
hidden_dim = 100
num_layers = 2
output_dim = Y.size(1)

encoder = Encoder(input_dim, hidden_dim, num_layers)
decoder = ProbabilisticDecoder(output_dim, hidden_dim, num_layers)
model = ProbabilisticSeq2Seq(encoder, decoder)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [88]:
first_item = next(iter(train_loader))
#print(first_item)
first_item[0].shape

torch.Size([2, 96])

In [89]:
for batch in train_loader:
    src = batch[:, 0, :, ]
    trg = batch[:, 1, :, ]
    trg_len = trg.size(1)
    src = src.to(device)
    trg = trg.to(device)
    means, stds = model(src, trg, trg_len)
    loss = negative_log_likelihood(trg, means, stds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(loss.item())

torch.Size([64, 96])


IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)