# Test 2.2 - News Linear

### Import

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from backtesting import Backtest
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
from datetime import datetime
from lumibot.brokers import Alpaca
from lumibot.backtesting import YahooDataBacktesting
import matplotlib.pyplot as plt
import numpy as np

In [None]:
file_path = "../Models/best_model.pt"

# Delete current model
if os.path.exists(file_path):
    os.remove(file_path)

### Device

In [None]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Using device: {device}")

### Hyperparameter

In [None]:
# Model parameter
input_size = 17
output_size = 1
hidden_size = 1000
num_layers = 2
dropout = 0.3

# Training parameter
num_epochs = 500 # early stopping
learning_rate = 0.00001

backtest_name = "Test2.2"

### LSTM Model

In [None]:
class Net(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers, dropout_rate=0.5):
        super(Net, self).__init__()
        
        self.layer_1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(p=dropout_rate)  

        self.hidden_layers = nn.ModuleList([
            nn.Linear(hidden_size, hidden_size) for _ in range(num_layers)
        ])
        
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.arctan(self.layer_1(x))
        x = self.dropout(x)
        
        for layer in self.hidden_layers:
            x = torch.arctan(layer(x))
            x = self.dropout(x)
        
        x = self.output_layer(x)
        return x

### Data Preperation

In [None]:
# Train
df = pd.read_pickle("../../Data/train_spy_data.pkl") 

display(df)

train_inputs = df.iloc[:, :-1]

scaler = MinMaxScaler()
scaler_y = MinMaxScaler()

scaled_train_inputs = scaler.fit_transform(df.iloc[:, 1:-1].values)  
scaled_train_labels = scaler_y.fit_transform(df.iloc[:, -1].values.reshape(-1, 1))
inputs_scaled = torch.tensor(scaled_train_inputs)
labels_scaled = torch.tensor(scaled_train_labels)

In [None]:
# Test
test_df = pd.read_pickle("../../Data/test_spy_data.pkl")


display(test_df)

scaled_test_inputs = scaler.transform(test_df.iloc[:, 1:-1].values) 
scaled_test_labels = scaler_y.transform(test_df.iloc[:, -1].values.reshape(-1, 1))
scaled_test_inputs = torch.tensor(scaled_test_inputs)
scaled_test_labels = torch.tensor(scaled_test_labels)

### Init

In [None]:
# Initialize model, loss function, optimizer
net = Net(input_size, output_size, hidden_size, num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

### Training

In [None]:
os.makedirs("../Models", exist_ok=True)
best_model_path = "../Models/best_model.pt"

patience = 8
loss_vals = []
test_loss_vals = []
best_test_loss = float("inf")

for epoch in range(num_epochs):
    net.train()  
    
    optimizer.zero_grad()

    outputs = net(inputs_scaled.float())
    outputs = outputs.squeeze(-1)  

    loss = criterion(outputs, labels_scaled.float())
    
    loss.backward()
    optimizer.step()
    
    loss_vals.append(loss.item())

    net.eval()
    with torch.no_grad():
        test_outputs = net(inputs_scaled.float())
        test_outputs = test_outputs.squeeze(-1) 
        test_loss = criterion(test_outputs, scaled_test_labels.float())
        test_loss_vals.append(test_loss.item())
    
        if test_loss < best_test_loss:
            best_epoch = epoch
            best_test_loss = test_loss
            torch.save(net.state_dict(), best_model_path)
            print(f"Bestes Modell gespeichert mit Test Loss: {best_test_loss:.4f}")
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            
        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {loss:.4f}, Test Loss: {test_loss:.4f}")
        
        # Early Stopping-Kriterium prüfen
        if early_stopping_counter >= patience:
            print(f"Early stopping nach {epoch + 1} Epochen. Test loss verbesserte sich nicht in den letzten {patience} Epochen.")
            break

# Plot der Trainings- und Testverluste
plt.figure(figsize=(10, 5))
plt.plot(loss_vals, label='Training Loss')
plt.plot(test_loss_vals, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training und Test Loss')
plt.grid(True)
plt.show()

print(f"Training abgeschlossen. Bestes Modell gespeichert unter: {best_model_path}")
print(f"Std Training Loss: {np.std(loss_vals)}")
print(f"Std Test Loss: {np.std(test_loss_vals)}")
print(f"Min Training Loss: {np.min(loss_vals)}")
print(f"Min Test Loss: {np.min(test_loss_vals)}")

### Backtesting

In [None]:

os.makedirs("logs", exist_ok=True)
os.makedirs("results", exist_ok=True)

test_data = pd.read_pickle("../../Data/test_spy_data.pkl")
train_data = pd.read_pickle("../../Data/train_spy_data.pkl")


scaler = MinMaxScaler()
scaler.fit(train_data.iloc[:, 1:-1].values)

scaler_y = MinMaxScaler()
scaler_y.fit(train_data.iloc[:, -1].values.reshape(-1, 1))

model = Net(input_size, output_size, hidden_size, num_layers)
model_path = "../Models/best_model.pt"
model.load_state_dict(torch.load(model_path))
model.eval()

ALPACA_CREDS = {
    "API_KEY": os.getenv("ALPACA_API_KEY"),
    "API_SECRET": os.getenv("ALPACA_API_SECRET"),
    "PAPER": True,
}

# Strategy setup
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
broker = Alpaca(ALPACA_CREDS)

strategy = Backtest(
    name=backtest_name,
    broker=broker,
    parameters={
        "symbol": "spy",
        "cash_at_risk": 0.5,
        "model": model,
        "num_prior_days": 30,
        "dataset": test_data,
        "scaler": scaler,
        "scaler_y": scaler_y,
    },
)

# Run backtest
backtest_results = strategy.backtest(
    YahooDataBacktesting,
    start_date,
    end_date,
    name=backtest_name,
    parameters={
        "symbol": "spy",
        "cash_at_risk": 0.5,
        "model": model,
        "dataset": test_data,
        "num_prior_days": 30,
        "scaler": scaler,
        "scaler_y": scaler_y,
    },
    benchmark_asset="SPY",
    show_plot=True,
    show_tearsheet=True,
)

# Save results
pd.DataFrame(backtest_results).to_csv("results/backtest_results.csv.gz", index=False, compression="gzip")

print("Backtesting complete. Results saved to backtest_results.csv.gz.")

In [None]:

# Backtesting-Funktion
def backtest_model(model, x_data, y_data, scaler_y):
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        # Vorhersage
        output = model(x_data)
        predictions.extend(output.numpy())
        actuals.extend(y_data.numpy())

    # Rücktransformation der Vorhersagen und tatsächlichen Werte
    predictions = scaler_y.inverse_transform(np.array(predictions).reshape(-1, 1))
    actuals = scaler_y.inverse_transform(np.array(actuals).reshape(-1, 1))

    return predictions.flatten(), actuals.flatten()

# Backtesting starten
def run_backtest(test_df, model_path, seq_size):
    # Daten vorverarbeiten
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    train_data = pd.read_pickle("../../Data/train_spy_data.pkl")

    X_test = test_df.iloc[:, 1:-1]
    y_test = test_df.iloc[:, -1]
    scaler_X.fit(train_data.iloc[:, 1:-1].values)
    scaler_y.fit(train_data.iloc[:, -1].values.reshape(-1, 1))


    X_test_scaled = scaler_X.transform(X_test)
    y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))
    X_test_scaled = torch.tensor(X_test_scaled).float()
    y_test_scaled = torch.tensor(y_test_scaled).float()

    # Modell laden
    model = Net(input_size=input_size, output_size=output_size, hidden_size=hidden_size, num_layers=num_layers)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    # Backtesting durchführen
    predictions, actuals = backtest_model(model, X_test_scaled, y_test_scaled, scaler_y)

    # Ergebnisse visualisieren
    plt.figure(figsize=(14, 7))
    plt.plot(predictions, label="Predicted", color="blue")
    plt.plot(actuals, label="Actual", color="orange")
    plt.title("Backtesting Results")
    plt.legend()
    plt.show()

    # Statistiken berechnen
    df_results = pd.DataFrame({"Actual": actuals, "Predicted": predictions})
    mse = ((df_results["Actual"] - df_results["Predicted"]) ** 2).mean()
    mae = np.abs(df_results["Actual"] - df_results["Predicted"]).mean()
    print(f"Mean Squared Error (MSE): {mse:.2f}")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")

    return df_results

# Anwendung der Backtesting-Funktion
test_data = pd.read_pickle("../../Data/test_spy_data.pkl")  # Testdatensatz laden
model_path = "../Models/best_model.pt"  # Pfad zum gespeicherten Modell
seq_size = 30  # Sequenzgröße

results = run_backtest(test_data, model_path, seq_size)
