In [559]:
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

import psycopg2
import numpy as np
from tqdm.auto import tqdm
from timeit import default_timer as timer

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [560]:
def pgDataLoader():
    DATABASE_URL = "postgresql://overcat:overmind@localhost:5432/stocks"
    query = """
    SELECT * from data.reversals;
    """
    
    dataset = []
    labels = []
    
    try:
        conn = psycopg2.connect(DATABASE_URL)
        with conn.cursor() as cur:
            cur.execute(query)
            results = cur.fetchall()  # Fetch all rows from the query result
            
            for row in results:
                matrix1 = np.array([
                    row[1]["Open"],
                    row[1]["High"],
                    row[1]["Low"],
                    row[1]["Close"],
                    row[1]["Volume"]
                    # row[1]["Time"]
                ])
    
                matrix2 = np.array([
                    row[2]["Open"],
                    row[2]["High"],
                    row[2]["Low"],
                    row[2]["Close"],
                    row[2]["Volume"]
                    # row[2]["Time"]
                ])
    
                # print(row[3], row[4])
                # fig, axes = plt.subplots(1, 1)
                # candle(np.concatenate((matrix1, matrix2), axis=1), ax=axes, t0=row[4])
    
                matrix1 = np.moveaxis(matrix1, 1, 0)
                matrix2 = np.moveaxis(matrix2, 1, 0)
    
                # fig, axes = plt.subplots(1, 1)
                # candle(np.moveaxis(np.concatenate((matrix1, matrix2)), 1, 0), ax=axes, t0=temp[0])
                dataset.append(matrix1)
                labels.append(row[5])
    finally:
        conn.close()
        
    return dataset, labels

In [561]:
data, labels = pgDataLoader()
len(data), len(labels)

(1355, 1355)

In [562]:
alt_data = []
for i, sample in enumerate(data):
    open_prices = sample[:, 0]
    high_prices = sample[:, 1]
    low_prices = sample[:, 2]
    close_prices = sample[:, 3]
    # volume = sample[:, 4]

    body_length = np.abs(close_prices - open_prices)
    upper_shadow_length = high_prices - np.maximum(open_prices, close_prices)
    lower_shadow_length = np.minimum(open_prices, close_prices) - low_prices

    alt_sample = np.moveaxis(np.vstack((body_length, upper_shadow_length, lower_shadow_length, close_prices, volume)), 1, 0)
    # alt_sample = np.moveaxis(np.vstack((body_length, upper_shadow_length, lower_shadow_length, close_prices)), 1, 0)
    # alt_sample = np.moveaxis(np.vstack((body_length, upper_shadow_length, lower_shadow_length)), 1, 0)

    alt_data.append(alt_sample)

In [563]:
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.sequences = data
        self.labels = labels
        
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)

In [564]:
# train_size = int(0.8*len(data))
# test_size = len(data) - train_size
# dataset = CustomDataset(data, labels)

train_size = int(0.999*len(alt_data))
test_size = len(alt_data) - train_size
dataset = CustomDataset(alt_data, labels)

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [591]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(hidden_size*14*2, num_classes)
        self.hidden_size = hidden_size
    def forward(self, x):
        h0 = torch.zeros(num_layers*2, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(num_layers*2, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.flatten(out)
        out = self.fc(out)
        # out = self.fc(out[:, -1, :])
        return out

In [599]:
input_size = 5
hidden_size = 64
num_layers = 2
num_classes = 2

model = LSTMClassifier(input_size, hidden_size, num_layers, num_classes)
model.to(device)

LSTMClassifier(
  (lstm): LSTM(5, 64, num_layers=2, batch_first=True, bidirectional=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Linear(in_features=1792, out_features=2, bias=True)
)

In [600]:
class BinaryAccuracy:
    def __init__(self, threshold=0.5):
        self.threshold = threshold

    def __call__(self, logits, targets):
        # Apply sigmoid to logits to get probabilities
        probabilities = torch.sigmoid(logits).squeeze(dim=1)
        # Convert probabilities to binary predictions
        predictions = (probabilities >= self.threshold).float()
        # Compare predictions with targets and calculate accuracy
        correct = (predictions == targets).float().sum()
        accuracy = correct / targets.numel()
        return accuracy.item()

class MultiClassAccuracy:
    def __init__(self):
        pass

    def __call__(self, logits, targets):
        # Apply softmax to logits to get class probabilities (optional, for insight)
        # probabilities = torch.softmax(logits, dim=1)

        # Get the predicted class indices by applying argmax to logits
        predictions = torch.argmax(logits, dim=1)

        # Compare predictions with targets and calculate accuracy
        correct = (predictions == targets).float().sum()
        accuracy = correct / targets.numel()  # Total number of samples
        return accuracy.item()

In [601]:
# loss_fn = nn.BCEWithLogitsLoss()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
accuracy_fn = MultiClassAccuracy()
# accuracy_fn = BinaryAccuracy()

In [602]:
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0

    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # 1. Forward pass
        X = X.to(device)
        y = y.to(device)
        y_pred = model(X)

        # print(y_pred)
        # print()
        # print(y)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred.squeeze(1), y)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate and accumulate accuracy metric across all batches
        train_acc += accuracy_fn(y_pred, y)

    # Adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

In [603]:
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
    # Put model in eval mode
    model.eval()

    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0

    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            X = X.to(device)
            y = y.to(device)
            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits.squeeze(1), y)
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            test_acc += accuracy_fn(test_pred_logits, y)

    # Adjust metrics to get average loss and accuracy per batch
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [604]:
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int = 5):

    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    # 3. Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn)

        # 4. Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # 5. Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    # 6. Return the filled results at the end of the epochs
    return results

In [605]:
# Set random seeds
torch.manual_seed(42)

# Set number of epochs
NUM_EPOCHS = 200

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Train model_0
model_0_results = train(model=model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.6956 | train_acc: 0.5374 | test_loss: 0.7630 | test_acc: 0.0000
Epoch: 2 | train_loss: 0.6923 | train_acc: 0.5251 | test_loss: 0.7356 | test_acc: 0.0000
Epoch: 3 | train_loss: 0.6881 | train_acc: 0.5477 | test_loss: 0.7355 | test_acc: 0.5000
Epoch: 4 | train_loss: 0.6885 | train_acc: 0.5486 | test_loss: 0.8194 | test_acc: 0.0000
Epoch: 5 | train_loss: 0.6895 | train_acc: 0.5431 | test_loss: 0.7413 | test_acc: 0.0000
Epoch: 6 | train_loss: 0.6884 | train_acc: 0.5536 | test_loss: 0.7155 | test_acc: 0.5000
Epoch: 7 | train_loss: 0.6861 | train_acc: 0.5520 | test_loss: 0.7511 | test_acc: 0.0000
Epoch: 8 | train_loss: 0.6846 | train_acc: 0.5393 | test_loss: 0.7855 | test_acc: 0.0000
Epoch: 9 | train_loss: 0.6845 | train_acc: 0.5699 | test_loss: 0.7747 | test_acc: 0.0000
Epoch: 10 | train_loss: 0.6799 | train_acc: 0.5685 | test_loss: 0.6663 | test_acc: 1.0000
Epoch: 11 | train_loss: 0.6840 | train_acc: 0.5751 | test_loss: 0.7917 | test_acc: 0.0000
Epoch: 12 | train_l

In [606]:
torch.save(model.state_dict(), "lstm_model.pth")
print("Model saved!")

Model saved!
