In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# 1️⃣ Load data
df = pd.read_csv('CEX_ETHUSD_1h.csv', skiprows=1)
df.columns = ['unix', 'date', 'symbol', 'open', 'high', 'low', 'close', 'Volume ETH', 'Volume USD']
df = df.drop(columns=['unix', 'symbol', 'Volume USD'])
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
df = df.dropna(subset=['date']).reset_index(drop=True)
df = df.sort_values('date').reset_index(drop=True)

# 2️⃣ Feature engineering
df['ma_7'] = df['close'].rolling(window=7).mean()
df['ma_21'] = df['close'].rolling(window=21).mean()
df['ema_7'] = df['close'].ewm(span=7, adjust=False).mean()
df['ema_21'] = df['close'].ewm(span=21, adjust=False).mean()
ema_12 = df['close'].ewm(span=12, adjust=False).mean()
ema_26 = df['close'].ewm(span=26, adjust=False).mean()
df['macd'] = ema_12 - ema_26
df['bollinger_mid'] = df['close'].rolling(window=20).mean()
df['bollinger_std'] = df['close'].rolling(window=20).std()
df['bollinger_upper'] = df['bollinger_mid'] + (df['bollinger_std'] * 2)
df['bollinger_lower'] = df['bollinger_mid'] - (df['bollinger_std'] * 2)
delta = df['close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / (loss + 1e-10)
df['rsi'] = 100 - (100 / (1 + rs))
df = df.fillna(0)

# 3️⃣ Normalize
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.drop(columns=['date']))

# 4️⃣ Create sequences
def create_sequences(data, seq_length=30):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length), :]
        y = data[i + seq_length, 3]  # 'close' column index
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 30
X, y = create_sequences(scaled_data, seq_length)

# 5️⃣ Train/test split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# 6️⃣ Convert to tensors
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float().unsqueeze(1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float().unsqueeze(1)

# 7️⃣ DataLoader
batch_size = 64
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

# 8️⃣ Define LSTM model (no dropout/early stopping)
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train.shape[2]
model = LSTMModel(input_size=input_size, hidden_size=50, num_layers=2, output_size=1)

# 🔟 Training loop
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 20

for epoch in range(num_epochs):
    for X_batch, y_batch in train_loader:
        model.train()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch}, Loss: {loss.item()}")

# 1️⃣1️⃣ Prediction
model.eval()
with torch.no_grad():
    y_pred = model(X_test).squeeze().numpy()
    y_true = y_test.squeeze().numpy()

# 1️⃣2️⃣ Denormalize
close_range = scaler.data_max_[3] - scaler.data_min_[3]
close_min = scaler.data_min_[3]
y_pred_rescaled = y_pred * close_range + close_min
y_true_rescaled = y_true * close_range + close_min

# 1️⃣3️⃣ Plot
plt.figure(figsize=(12, 6))
plt.plot(y_true_rescaled, label='True', alpha=0.6)
plt.plot(y_pred_rescaled, label='Predicted', alpha=0.6)
plt.legend()
plt.title('ETH LSTM Prediction (Hourly)')
plt.show()

# 1️⃣4️⃣ Residuals in real dollars
residuals_dollars = y_true_rescaled - y_pred_rescaled
print(f"Min Error: {residuals_dollars.min()}")
print(f"Max Error: {residuals_dollars.max()}")
print(f"Mean Absolute Error in dollars: {np.mean(np.abs(residuals_dollars))}")

plt.figure(figsize=(12, 6))
plt.plot(residuals_dollars)
plt.title("Residuals in Real ETH Price (USD)")
plt.ylabel("USD error")
plt.show()