In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

def generate_data(seq_length=1000):
    
    x = np.linspace(0, 100, seq_length)
    y = np.sin(x) + 0.1 * np.random.randn(seq_length)
    return y.reshape(-1, 1)

def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        xs.append(data[i:i+seq_length])
        ys.append(data[i+seq_length])
    return np.array(xs), np.array(ys)

if __name__ == "__main__":
    
    raw_data = generate_data(1100)
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(raw_data)
    seq_len = 50
    X, y = create_sequences(data_scaled, seq_len)

    
    X_train = torch.tensor(X[:-100], dtype=torch.float32)
    y_train = torch.tensor(y[:-100], dtype=torch.float32)
    X_test = torch.tensor(X[-100:], dtype=torch.float32)
    y_test = torch.tensor(y[-100:], dtype=torch.float32)

   
    input_dim = 1
    d_model = 64 
    nhead = 4
    num_layers = 2  

    # 4. Model Definition
    class TransformerModel(nn.Module):
        def __init__(self, input_dim, d_model, nhead, num_layers, seq_len):
            super(TransformerModel, self).__init__()
            self.embedding = nn.Linear(input_dim, d_model)
            self.pe = self._generate_positional_encoding(seq_len, d_model)  # Store PE
            self.transformer_encoder = nn.TransformerEncoder(
                nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead),
                num_layers=num_layers
            )
            self.fc_out = nn.Linear(d_model, 1)
            self.d_model = d_model

        def forward(self, x):
            x_embed = self.embedding(x)
            x_pos = x_embed + self.pe[:, :x.size(1)].to(x.device) # Use stored PE
            x_trans = self.transformer_encoder(x_pos)
            output = self.fc_out(x_trans[:, -1, :])
            return output

        def _generate_positional_encoding(self, seq_len, d_model):
            pe = torch.zeros(seq_len, d_model)
            position = torch.arange(0, seq_len).unsqueeze(1)
            div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)
            pe = pe.unsqueeze(0)  # Shape: (1, seq_len, d_model)
            return pe

    model = TransformerModel(input_dim, d_model, nhead, num_layers, seq_len)

    
    epochs = 100  
    lr = 0.0005 
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    
    for epoch in range(epochs):
        model.train() 
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:  
            print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

   
    model.eval()  
    with torch.no_grad():
        preds = model(X_test).squeeze().cpu().numpy()
        actual = y_test.cpu().numpy().squeeze()

    plt.plot(actual, label='Actual')
    plt.plot(preds, label='Predicted')
    plt.legend()
    plt.title("Time Series Prediction")
    plt.xlabel("Time Step")
    plt.ylabel("Value")
    plt.show()