In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler,QuantileTransformer
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


In [None]:
class Time2Vec(nn.Module):
    """
    Time2Vec converts scalar time inputs into a vector representation using sine and cosine transformations.
    This is useful for capturing periodic patterns in time series data.

    Args:
        num_frequency (int): Number of frequency components for sine and cosine transformations.
        period (int): Period of the sine and cosine functions.
    """

    def __init__(self, num_frequency=16, period=256):
        super(Time2Vec, self).__init__()
        self.num_frequency = num_frequency
        self.period = period
        self.freqs = torch.linspace(1, num_frequency, num_frequency).float()  # Frequency components

    def forward(self, t):
        """
        Forward pass for Time2Vec.

        Args:
            t (torch.Tensor): Input time values of shape (batch_size,).

        Returns:
            torch.Tensor: Concatenated tensor of shape (batch_size, 1 + 2 * num_frequency) containing
                           the original time value and its sine/cosine transformations.
        """
        t = t.reshape(-1, 1)  # Reshape for broadcasting
        sin_features = torch.sin(2 * torch.pi * self.freqs.reshape(1, -1) * t / self.period)
        cos_features = torch.cos(2 * torch.pi * self.freqs.reshape(1, -1) * t / self.period)
        time_features = torch.cat([t, sin_features, cos_features], dim=1)  # Concatenate features
        return time_features



In [None]:

def create_sequences(input_features, close_prices, in_seq_length=100, out_seq_len=5):
    """
    Creates input-output sequences for time series data.

    Args:
        input_features (pd.DataFrame): Input features for the model.
        close_prices (pd.Series or np.ndarray): Target close prices.
        in_seq_length (int): Length of the input sequence.
        out_seq_len (int): Length of the output sequence.

    Returns:
        np.ndarray: Input sequences of shape (num_samples, in_seq_length, num_features).
        np.ndarray: Output sequences of shape (num_samples, out_seq_len).
    """
    x, y = [], []
    for i in range(len(input_features) - in_seq_length - out_seq_len):
        # Extract input sequence
        x.append(input_features.iloc[i:i+in_seq_length].values)

        # Extract output sequence and flatten if necessary
        y_seq = close_prices[i+in_seq_length:i+in_seq_length+out_seq_len]
        if isinstance(y_seq, np.ndarray) and y_seq.ndim > 1:
            y_seq = y_seq.flatten()
        y.append(y_seq)

    return np.array(x), np.array(y)


In [None]:

def create_time_features(company):
    """
    Creates time-based features using the Time2Vec layer for a given company's timestamp data.

    Args:
        company (pd.DataFrame): DataFrame containing the company's data with timestamps as the index.

    Returns:
        torch.Tensor: Time features generated by the Time2Vec layer, of shape (num_samples, 1 + 2 * num_frequency).
    """
    # Extract timestamps from the DataFrame index
    timestamps = company.index
    timestamps = pd.Series(timestamps)

    # Convert timestamps to ordinal values (integers representing days since 0001-01-01)
    timestamps = timestamps.apply(lambda x: x.toordinal())

    # Convert ordinal timestamps to a PyTorch tensor
    date_tensor = torch.tensor(timestamps).float()

    # Initialize Time2Vec layer and generate time features
    time2vec_layer = Time2Vec()
    time_features = time2vec_layer(date_tensor)

    return time_features





In [None]:
class StockTransformer(nn.Module):
    """
    A Transformer-based model for stock price prediction.

    Args:
        input_dim (int): Dimension of the input features.
        d_model (int): Dimension of the model (embedding size).
        nhead (int): Number of attention heads in the Transformer.
        num_encoder_layers (int): Number of layers in the Transformer encoder.
        num_decoder_layers (int): Number of layers in the Transformer decoder.
        seq_length (int): Length of the input sequence.
        hidden_dim (int, optional): Dimension of the hidden layer. Defaults to 128.
        dropout_rate (float, optional): Dropout rate. Defaults to 0.3.
    """

    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers, seq_length, hidden_dim=128, dropout_rate=0.3):
        super(StockTransformer, self).__init__()
        # Linear layer to project input features to the model dimension
        self.fc_in = nn.Linear(input_dim, d_model)
        # self.dropout1 = nn.Dropout(p=dropout_rate)

        # Transformer encoder
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead),
            num_layers=num_encoder_layers
        )

        # Transformer decoder
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead),
            num_layers=num_decoder_layers
        )
        # self.fc_hidden1 = nn.Linear(d_model, hidden_dim * 2)
        # self.fc_hidden2 = nn.Linear(hidden_dim * 2, hidden_dim)
        # self.fc_out = nn.Linear(hidden_dim, 1)

        # Hidden fully connected layer
        self.fc_hidden = nn.Linear(d_model, hidden_dim)
        # self.fc_out = nn.Linear(hidden_dim, 1)

        # Dropout layer
        self.dropout2 = nn.Dropout(p=dropout_rate)

        # Output layer to predict the closing price
        self.fc_out = nn.Linear(hidden_dim, 1)
        # self.relu = nn.ReLU()
        # self.dropout2 = nn.Dropout(p=dropout_rate)

    def forward(self, src, tgt):
        """
        Forward pass for the StockTransformer model.

        Args:
            src (torch.Tensor): Source sequence (input features) of shape (batch_size, seq_len, input_dim).
            tgt (torch.Tensor): Target sequence (input features for the decoder) of shape (batch_size, seq_len, input_dim).

        Returns:
            torch.Tensor: Predicted output of shape (batch_size, seq_len).
        """
        # Project input features to the model dimension
        src = self.fc_in(src)
        tgt = self.fc_in(tgt)
        # src = self.dropout1(src)
        # tgt = self.dropout1(tgt)

        # Permute tensors to (seq_len, batch_size, d_model) for Transformer
        src = src.permute(1, 0, 2)
        tgt = tgt.permute(1, 0, 2)

        # Pass through the Transformer encoder and decoder
        memory = self.transformer_encoder(src)
        output = self.transformer_decoder(tgt, memory)

        # Permute back to (batch_size, seq_len, d_model)
        output = output.permute(1, 0, 2)
        # output = self.dropout2(output)
        # output = self.relu(self.fc_hidden2(output))
        # output = self.relu(self.fc_hidden3(output))

        # Reshape and pass through the hidden layer
        batch_size, seq_len, _ = output.shape
        output = output.reshape(-1, output.size(-1))
        output = self.fc_hidden(output)
        output = self.dropout2(output)

        # Final output layer to predict the closing price
        output = self.fc_out(output)
        # output = self.fc_hidden1(output)
        # # output = self.dropout2(output)
        # output = self.fc_hidden2(output)
        # output = self.fc_out(output)

        # Reshape back to (batch_size, seq_len)
        output = output.view(batch_size, seq_len)

        return output

In [None]:
def pad_batch(batch, batch_size):
    """
    Pads a batch of tensors to the specified batch size with zeros if necessary.

    Args:
        batch (torch.Tensor): The input batch of tensors.
        batch_size (int): The desired batch size after padding.

    Returns:
        torch.Tensor: The padded batch with the specified batch size.
    """
    current_size = batch.shape[0]

    if current_size < batch_size:
        padding = torch.zeros((batch_size - current_size, *batch.shape[1:])).to(batch.device)
        padded_batch = torch.cat([batch, padding], dim=0)
        return padded_batch

    return batch

In [None]:
def create_input(stock_name):
    """
    Creates input features for stock data including MACD, EMA, and scaled features.

    Args:
        stock_data (pd.DataFrame): The input stock data.

    Returns:
        tuple: A tuple containing the input features, feature scaler, time scaler,
               close scaler, and scaled close price.
    """
    # Fetch historical data for the given stock
    stock_ticker = yf.Ticker(stock_name)
    stock_data = stock_ticker.history(period='max')

    # Calculate 12-day and 24-day EMAs
    stock_data['12day_EMA'] = stock_data['Close'].ewm(span=12, adjust=False).mean()
    stock_data['24day_EMA'] = stock_data['Close'].ewm(span=24, adjust=False).mean()
    stock_data['MACD'] = stock_data['12day_EMA'] - stock_data['24day_EMA']

    features = stock_data.drop(columns=["Date"])
    print(stock_data)

    time_features = create_time_features(stock_data)
    time_features_df = pd.DataFrame(time_features)
    print(time_features_df.shape)

    # Extract closing price as target
    close_price = features['Close'].values.reshape(-1, 1)
    close_price = np.log1p(features['Close'].values.reshape(-1, 1))

    # Scale features
    feature_scaler = MinMaxScaler()
    time_scaler = MinMaxScaler()
    close_scaler = QuantileTransformer(output_distribution='normal')
    # close_scaler=MinMaxScaler()
    # Scale time features - fix dtype issue
    time_column = time_features_df.iloc[:, 0].values.reshape(-1, 1)
    time_features_transformed = time_scaler.fit_transform(time_column).astype(np.float32)
    time_features_df = pd.DataFrame(time_features)
    time_features_df.iloc[:, 0] = time_features_transformed.flatten()

    # Scale all features
    scaled_features = pd.DataFrame(feature_scaler.fit_transform(features))

    # Scale close price separately
    scaled_close = close_scaler.fit_transform(close_price)

    # Concatenate all features
    input_features = pd.concat([time_features_df, scaled_features], axis=1)

    return input_features, feature_scaler, time_scaler, close_scaler, scaled_close

In [None]:
def train_and_evaluate(model, train_loader, test_loader, close_scaler, num_epochs=10, learning_rate=1e-4):
    """
    Trains and evaluates the given model using the provided data loaders, scaler, and hyperparameters.

    Args:
        model (torch.nn.Module): The model to be trained and evaluated.
        train_loader (torch.utils.data.DataLoader): DataLoader for the training data.
        test_loader (torch.utils.data.DataLoader): DataLoader for the testing data.
        close_scaler (sklearn.preprocessing.TransformerMixin): Scaler used for the close prices.
        num_epochs (int, optional): Number of epochs to train. Defaults to 10.
        learning_rate (float, optional): Learning rate for the optimizer. Defaults to 1e-4.

    Returns:
        tuple: A tuple containing predictions, targets, test indices, training losses, and validation losses.
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    criterion = nn.HuberLoss(delta=4.0)
    # criterion=nn.MSELoss()

    best_val_loss = float('inf')
    train_losses = []
    val_losses = []

    # For storing predictions
    all_predictions = []
    all_targets = []
    test_indices = []

    # Training Loop
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch in train_loader:
            src, tgt = batch
            src = src.to(device).to(dtype=torch.float32)
            tgt = tgt.to(device).to(dtype=torch.float32)
            tgt = tgt.unsqueeze(-1)
            tgt_input = torch.zeros((tgt.shape[0], tgt.shape[1], src.shape[-1]), device=device)
            tgt_input[:, :, 0] = tgt.squeeze(-1)

            optimizer.zero_grad()
            output = model(src, tgt_input[:, :-1])
            # Use only the last prediction for each sequence
            loss = criterion(output[:, -1], tgt.squeeze(-1)[:, -1])
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_epoch_loss)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_epoch_loss:.6f}')

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                src, tgt = batch
                src = src.to(device).to(dtype=torch.float32)
                tgt = tgt.to(device).to(dtype=torch.float32)
                tgt = tgt.unsqueeze(-1)
                tgt_input = torch.zeros((tgt.shape[0], tgt.shape[1], src.shape[-1]), device=device)
                tgt_input[:, :, 0] = tgt.squeeze(-1)

                output = model(src, tgt_input[:, :-1])
                loss = criterion(output[:, -1], tgt.squeeze(-1)[:, -1])
                val_loss += loss.item()

        avg_val_loss = val_loss / len(test_loader)
        val_losses.append(avg_val_loss)
        print(f'Validation Loss: {avg_val_loss:.6f}')

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_model.pth')

    # Load best model for final evaluation
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()

    # Final evaluation and predictions
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            src, tgt = batch
            src = src.to(device).to(dtype=torch.float32)
            tgt = tgt.to(device).to(dtype=torch.float32)

            tgt = tgt.unsqueeze(-1)
            tgt_input = torch.zeros((tgt.shape[0], tgt.shape[1], src.shape[-1]), device=device)
            tgt_input[:, :, 0] = tgt.squeeze(-1)

            output = model(src, tgt_input[:, :-1])
            preds = output[:, -1].cpu().numpy()
            targets = tgt[:, -1].cpu().numpy()

            all_predictions.extend(preds)
            all_targets.extend(targets)
            test_indices.extend(list(range(i * test_loader.batch_size,
                                          min((i + 1) * test_loader.batch_size, len(test_loader.dataset)))))

    # Inverse transform predictions and targets
    all_predictions = close_scaler.inverse_transform(
        np.array(all_predictions).reshape(-1, 1)).flatten()
    all_predictions = np.expm1(
        np.array(all_predictions).reshape(-1, 1)).flatten()
    all_targets = close_scaler.inverse_transform(
        np.array(all_targets).reshape(-1, 1)).flatten()
    all_targets = np.expm1(
         np.array(all_targets).reshape(-1, 1)).flatten()

    # Calculate metrics
    mse = np.mean((all_predictions - all_targets)**2)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((all_targets - all_predictions) / all_targets)) * 100

    print(f'Test MSE: {mse:.4f}')
    print(f'Test RMSE: {rmse:.4f}')
    print(f'Test MAPE: {mape:.2f}%')

    return all_predictions, all_targets, test_indices, train_losses, val_losses

In [None]:
def duplicate_linear(input):
  for i in range(1,3):
    np.insert(input,i,input.iloc[:,0],axis=1)
  return input

In [None]:
if __name__ == "__main__":
    # Prepare data
    input_features, feature_scaler, time_scaler, close_scaler, scaled_close = create_input('AMZN')
    pd.DataFrame(input_features).head()

    # Create sequences
    x, y = create_sequences(input_features, scaled_close)
    x_tensor, y_tensor = torch.tensor(x), torch.tensor(y)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        x_tensor, y_tensor, test_size=0.2, shuffle=False)

    # Create DataLoaders
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Initialize model (adjust parameters based on new dimensions)
    input_dim = input_features.shape[1]  # (16*2 + 1 time features + 7 original)
    d_model = 64
    nhead = 4
    num_encoder_layers = 2
    num_decoder_layers = 2
    seq_length = 100

    model = StockTransformer(
        input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers, seq_length)

    # Train and evaluate
    predictions, targets, test_indices, train_losses, val_losses = train_and_evaluate(
        model, train_loader, test_loader, close_scaler, num_epochs=100)


In [None]:
# model architecture
model

In [None]:
 # Visualization
test_dates = tesla.index[-len(predictions):]

  # Plot training curves
plt.figure(figsize=(6, 3))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot predictions vs actual
plt.figure(figsize=(10, 5))
plt.plot(test_dates, targets, label='Actual')
plt.plot(test_dates, predictions, label='Predicted')
plt.title('Predicted vs Actual Closing Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.show()