# LSTM Question 1 #

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import zipfile

# Load and preprocess data
df = pd.read_csv("/Users/arnavkarnik/Desktop/Test/daily.csv")
df = df.dropna()
price_data = df['Price'].values
print(f"Dataset length: {len(price_data)}")

# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
price_data_normalized = scaler.fit_transform(price_data.reshape(-1, 1))

# Create sequences for time series prediction
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Set sequence length (lookback period)
seq_length = 30  # Use 30 days of history to predict next day

# Create sequences
X, y = create_sequences(price_data_normalized, seq_length)
print(f"X shape: {X.shape}, y shape: {y.shape}")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1, num_layers=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_layer_size, output_size)
        
    def forward(self, x):
        batch_size = x.size(0)
        
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_layer_size).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_layer_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out shape: (batch_size, seq_length, hidden_size)
        
        # Get the output from the last time step
        out = self.fc(out[:, -1, :])
        return out

# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Instantiate the model
input_size = 1  # Single feature (Price)
hidden_layer_size = 100
output_size = 1  # Predict the next price
num_layers = 2

model = LSTMModel(input_size, hidden_layer_size, output_size, num_layers).to(device)
print(model)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    train_losses = []
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        
        for inputs, targets in train_loader:
            # Reshape inputs to [batch_size, seq_length, input_size]
            inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], 1).to(device)
            targets = targets.to(device)
            
            # Forward pass
            optimizer.zero_grad()
            outputs = model(inputs)
            
            # Calculate loss
            loss = criterion(outputs, targets)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        train_losses.append(epoch_loss)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.6f}')
    
    return train_losses

# Evaluation function
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], 1).to(device)
            targets = targets.to(device)
            
            outputs = model(inputs)
            
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(targets.cpu().numpy())
    
    avg_test_loss = test_loss / len(test_loader)
    print(f'Test Loss: {avg_test_loss:.6f}')
    
    return np.array(predictions), np.array(actuals)

# Train the model
num_epochs = 100
train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs)

# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()

# Evaluate the model
predictions, actuals = evaluate_model(model, test_loader, criterion)

# Inverse transform to get actual prices
predictions_reshaped = predictions.reshape(-1, 1)
actuals_reshaped = actuals.reshape(-1, 1)
predictions_actual = scaler.inverse_transform(predictions_reshaped)
actuals_actual = scaler.inverse_transform(actuals_reshaped)

# Plot predictions vs actual values
plt.figure(figsize=(12, 6))
plt.plot(actuals_actual, label='Actual Prices')
plt.plot(predictions_actual, label='Predicted Prices')
plt.xlabel('Time Steps')
plt.ylabel('Price')
plt.title('Predicted vs Actual Prices')
plt.legend()
plt.grid(True)
plt.show()

# Calculate metrics
def calculate_metrics(actual, pred):
    mse = np.mean((actual - pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(actual - pred))
    mape = np.mean(np.abs((actual - pred) / actual)) * 100
    return {'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}

metrics = calculate_metrics(actuals_actual, predictions_actual)
for metric_name, metric_value in metrics.items():
    print(f"{metric_name}: {metric_value:.4f}")

# Save the model
torch.save(model.state_dict(), 'lstm_price_prediction_model.pth')
print("Model saved successfully!")

# Making future predictions
def predict_future(model, last_sequence, n_steps, scaler):
    model.eval()
    future_predictions = []
    current_sequence = last_sequence.copy()
    
    for _ in range(n_steps):
        # Prepare the input sequence
        seq_tensor = torch.FloatTensor(current_sequence).reshape(1, seq_length, 1).to(device)
        
        # Get prediction
        with torch.no_grad():
            pred = model(seq_tensor)
        
        # Append prediction to results
        future_predictions.append(pred.cpu().numpy()[0, 0])
        
        # Update sequence with new prediction
        current_sequence = np.append(current_sequence[1:], pred.cpu().numpy()[0])
    
    # Inverse transform the predictions
    future_predictions = np.array(future_predictions).reshape(-1, 1)
    future_predictions = scaler.inverse_transform(future_predictions)
    
    return future_predictions

# Get the last sequence from the test data
last_sequence = X_test[-1]

# Predict next 30 days
n_future_days = 30
future_predictions = predict_future(model, last_sequence, n_future_days, scaler)

# Plot future predictions
plt.figure(figsize=(12, 6))
plt.plot(range(len(actuals_actual)), actuals_actual, label='Historical Actual')
plt.plot(range(len(actuals_actual), len(actuals_actual) + n_future_days), future_predictions, label='Future Predictions', color='red')
plt.axvline(x=len(actuals_actual)-1, color='k', linestyle='--')
plt.xlabel('Time Steps')
plt.ylabel('Price')
plt.title('Future Price Predictions')
plt.legend()
plt.grid(True)
plt.show()

print("Future predictions for the next 30 days:")
for i, pred in enumerate(future_predictions):
    print(f"Day {i+1}: {pred[0]:.2f}")