In [4]:
import math
from sympy import symbols, sin, cos, exp, ln, log, tan, asin, atan,cot
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

nltk.download('punkt')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Dataset Preprocessing

In [2]:
class TaylorDataset:
    def __init__(self, order, functions=None):
        self.order = order
        self.functions = functions if functions else self.default_functions()
        self.vocab_to_int = None
        self.int_to_vocab = None

    @staticmethod
    def default_functions():
        x = symbols('x')
        return [
            sin(x), cos(x), exp(x), ln(1 + x), log(1 + x, 10),
            1 / (1 + x), x ** 2 + x + 1, tan(x), asin(x), atan(x),exp(sin(x)),exp(tan(x)), cot(x),
            1/(1+x**2), exp(x)*(1+x), exp(x)*(1-x), 1/(1+x)**2, 1/(1-x)**2, 1/(1-x)**3, 1/(1-x**2), log(3+4*x),
            1/(1+x), 1/(1+x)**2, 1/(1-x), -ln(1-x)
            
        ]

    def generate(self):
        x = symbols('x')
        data = []
        for func in self.functions:
            expansion = func.series(x, 0, self.order + 1).removeO()
            data.append({"function": str(func), "expansion": str(expansion)})
        return pd.DataFrame(data).sample(frac=1, random_state=42, ignore_index=True)

    def tokenize(self, df):
        # Tokenize both function and expansion strings.
        # For expansion tokens, add <SOS> at the start and <EOS> at the end.
        tokens = []
        for _, row in df.iterrows():
            tokens.extend(word_tokenize(row['function']))
            exp_tokens = ['<SOS>'] + word_tokenize(row['expansion']) + ['<EOS>']
            tokens.extend(exp_tokens)

        counter = Counter(tokens)
        vocab = sorted(counter, key=counter.get, reverse=True)
        # Ensure special tokens exist:
        for special in ['<SOS>', '<EOS>', '<UNK>']:
            if special not in vocab:
                vocab.append(special)

        self.vocab_to_int = {token: i for i, token in enumerate(vocab, 1)}
        self.int_to_vocab = {i: token for token, i in self.vocab_to_int.items()}

        tokenized_data = {"function_tokens": [], "expansion_tokens": []}
        for _, row in df.iterrows():
            func_tokens = [self.vocab_to_int.get(token, self.vocab_to_int["<UNK>"])
                           for token in word_tokenize(row["function"])]
            exp_tokens = (['<SOS>'] + word_tokenize(row["expansion"]) + ['<EOS>'])
            exp_tokens = [self.vocab_to_int.get(token, self.vocab_to_int["<UNK>"]) for token in exp_tokens]
            tokenized_data["function_tokens"].append(func_tokens)
            tokenized_data["expansion_tokens"].append(exp_tokens)
        return pd.DataFrame(tokenized_data)
    
    def get_token_dicts(self):
        return self.vocab_to_int, self.int_to_vocab

In [5]:
order = 4
taylor_dataset = TaylorDataset(order)
df = taylor_dataset.generate()
df

Unnamed: 0,function,expansion
0,asin(x),x**3/6 + x
1,(x + 1)**(-2),5*x**4 - 4*x**3 + 3*x**2 - 2*x + 1
2,sin(x),-x**3/6 + x
3,1/(1 - x),x**4 + x**3 + x**2 + x + 1
4,exp(tan(x)),3*x**4/8 + x**3/2 + x**2/2 + x + 1
5,atan(x),-x**3/3 + x
6,1/(x**2 + 1),x**4 - x**2 + 1
7,cos(x),x**4/24 - x**2/2 + 1
8,(x + 1)**(-2),5*x**4 - 4*x**3 + 3*x**2 - 2*x + 1
9,1/(x + 1),x**4 - x**3 + x**2 - x + 1


In [6]:
tokenized_df = taylor_dataset.tokenize(df)
tokenized_df

Unnamed: 0,function_tokens,expansion_tokens
0,"[31, 2, 4, 3]","[6, 24, 1, 4, 7]"
1,"[2, 4, 1, 5, 3, 14, 2, 18, 3]","[6, 19, 8, 20, 1, 21, 8, 15, 1, 5, 7]"
2,"[25, 2, 4, 3]","[6, 32, 1, 4, 7]"
3,"[11, 2, 5, 8, 4, 3]","[6, 12, 1, 22, 1, 9, 1, 4, 1, 5, 7]"
4,"[23, 2, 26, 2, 4, 3, 3]","[6, 33, 1, 34, 1, 10, 1, 4, 1, 5, 7]"
5,"[35, 2, 4, 3]","[6, 36, 1, 4, 7]"
6,"[11, 2, 9, 1, 5, 3]","[6, 12, 8, 9, 1, 5, 7]"
7,"[37, 2, 4, 3]","[6, 27, 8, 10, 1, 5, 7]"
8,"[2, 4, 1, 5, 3, 14, 2, 18, 3]","[6, 19, 8, 20, 1, 21, 8, 15, 1, 5, 7]"
9,"[11, 2, 4, 1, 5, 3]","[6, 12, 8, 22, 1, 9, 8, 4, 1, 5, 7]"


# PyTorch Dataset and Collate Function

In [7]:
class TrainDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        data = self.dataset.iloc[idx]
        # function_tokens are encoder input; expansion_tokens are decoder target.
        function_tensor = torch.tensor(data['function_tokens'], dtype=torch.long)
        expansion_tensor = torch.tensor(data['expansion_tokens'], dtype=torch.long)
        return function_tensor, expansion_tensor

def collate_fn(batch):
    # batch: list of (src, trg) pairs.
    src_seqs, trg_seqs = zip(*batch)
    src_padded = nn.utils.rnn.pad_sequence(src_seqs, batch_first=True, padding_value=0)
    trg_padded = nn.utils.rnn.pad_sequence(trg_seqs, batch_first=True, padding_value=0)
    return src_padded, trg_padded

# Positional Encoding

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)  # [max_len, d_model]
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # [max_len, 1]
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # odd indices
        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        # x: [batch, seq_len, d_model]
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

# Transformer Seq2Seq Model

In [9]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, embed_size, nhead, num_layers, hidden_size, dropout=0.1):
        """
        input_dim: size of source vocabulary (including padding idx=0)
        output_dim: size of target vocabulary (including padding idx=0)
        embed_size: embedding dimension (d_model)
        nhead: number of attention heads
        num_layers: number of encoder and decoder layers
        hidden_size: dimension of the feedforward network
        """
        super().__init__()
        self.embed_size = embed_size
        
        self.src_embedding = nn.Embedding(input_dim, embed_size, padding_idx=0)
        self.tgt_embedding = nn.Embedding(output_dim, embed_size, padding_idx=0)
        
        self.pos_encoder = PositionalEncoding(embed_size, dropout)
        self.pos_decoder = PositionalEncoding(embed_size, dropout)
        
        self.transformer = nn.Transformer(d_model=embed_size, nhead=nhead, 
                                          num_encoder_layers=num_layers, 
                                          num_decoder_layers=num_layers, 
                                          dim_feedforward=hidden_size,
                                          dropout=dropout)
        self.fc_out = nn.Linear(embed_size, output_dim)
    
    def make_tgt_mask(self, tgt):
        # Generate a square subsequent mask for the target sequence.
        tgt_seq_len = tgt.shape[1]
        tgt_mask = torch.triu(torch.ones(tgt_seq_len, tgt_seq_len) == 1, diagonal=1)
        tgt_mask = tgt_mask.float().masked_fill(tgt_mask, float('-inf'))
        return tgt_mask

    def forward(self, src, tgt):
        """
        src: [batch, src_len]
        tgt: [batch, tgt_len]
        """
        src_emb = self.src_embedding(src) * math.sqrt(self.embed_size)  # [batch, src_len, embed_size]
        src_emb = self.pos_encoder(src_emb)  # [batch, src_len, embed_size]
        tgt_emb = self.tgt_embedding(tgt) * math.sqrt(self.embed_size)  # [batch, tgt_len, embed_size]
        tgt_emb = self.pos_decoder(tgt_emb)  # [batch, tgt_len, embed_size]
        
        # Transformer expects input shape: [seq_len, batch, embed_size]
        src_emb = src_emb.transpose(0, 1)
        tgt_emb = tgt_emb.transpose(0, 1)
        
        tgt_mask = self.make_tgt_mask(tgt).to(src.device)
        
        output = self.transformer(src_emb, tgt_emb, tgt_mask=tgt_mask)
        # output: [tgt_len, batch, embed_size]
        output = output.transpose(0, 1)  # [batch, tgt_len, embed_size]
        output = self.fc_out(output)     # [batch, tgt_len, output_dim]
        # Applying log softmax for NLLLoss
        return nn.functional.log_softmax(output, dim=-1)

# Training Class for Transformer

In [10]:
class Train:
    def __init__(self, epoch, batch_size, input_dim, output_dim, embed_size, nhead, num_layers, hidden_size, dropout=0.1):
        self.epoch = epoch
        self.batch_size = batch_size
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        self.model = TransformerModel(input_dim, output_dim, embed_size, nhead, num_layers, hidden_size, dropout).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.criterion = nn.NLLLoss(ignore_index=0)
    
    def run(self, dataloader):
        for epoch in range(self.epoch):
            self.model.train()
            epoch_loss = 0
            for src, tgt in dataloader:
                src, tgt = src.to(self.device), tgt.to(self.device)
                self.optimizer.zero_grad()
                # During training, we feed the full target sequence
                output = self.model(src, tgt[:, :-1])
                # output: [batch, tgt_len-1, output_dim]
                # Compare against target tokens shifted by one (i.e. from position 1 onward)
                output = output.reshape(-1, output.shape[-1])
                target = tgt[:, 1:].reshape(-1)
                loss = self.criterion(output, target)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            if (epoch + 1) % 100 == 0 or epoch == 0:
                print(f'Epoch {epoch+1} - Loss: {epoch_loss:.4f}')
    
    def get_model(self):
        return self.model.to("cpu")

# Initialize Dataset and Training Setup

In [11]:
vocab_to_int, int_to_vocab = taylor_dataset.get_token_dicts()
dataset = TrainDataset(tokenized_df)
train_loader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

# Hyperparameters for Transformer
epoch = 500
batch_size = 1
embed_size = 32
nhead = 4
num_layers = 2
hidden_size = 64
input_dim = len(vocab_to_int) + 1  # +1 for padding index 0
output_dim = len(vocab_to_int) + 1

trainer = Train(epoch, batch_size, input_dim, output_dim, embed_size, nhead, num_layers, hidden_size, dropout=0.1)
trainer.run(train_loader)



Epoch 1 - Loss: 94.9696
Epoch 100 - Loss: 4.1046
Epoch 200 - Loss: 2.1854
Epoch 300 - Loss: 3.1796
Epoch 400 - Loss: 2.9792
Epoch 500 - Loss: 1.7498


# Prediction Function for Transformer

In [12]:
def predict_sample(model, src_tensor, vocab_to_int, int_to_vocab, max_len=30):
    """
    Predicts the output sequence for a given input sequence (src_tensor) using the trained transformer model.
    Uses greedy decoding.
    """
    model.eval()
    device = next(model.parameters()).device
    src_tensor = src_tensor.unsqueeze(0).to(device)  # [1, src_len]
    
    # Encode source
    with torch.no_grad():
        src_emb = model.src_embedding(src_tensor) * math.sqrt(model.embed_size)
        src_emb = model.pos_encoder(src_emb).transpose(0,1)  # [src_len, 1, embed_size]
        memory = model.transformer.encoder(src_emb)
    
    # Start with <SOS>
    sos_token = vocab_to_int["<SOS>"]
    eos_token = vocab_to_int["<EOS>"]
    tgt_indices = [sos_token]
    
    for _ in range(max_len):
        tgt_tensor = torch.tensor(tgt_indices, dtype=torch.long, device=device).unsqueeze(0)  # [1, t]
        tgt_emb = model.tgt_embedding(tgt_tensor) * math.sqrt(model.embed_size)
        tgt_emb = model.pos_decoder(tgt_emb).transpose(0,1)  # [t, 1, embed_size]
        tgt_mask = model.make_tgt_mask(tgt_tensor).to(device)
        
        with torch.no_grad():
            output = model.transformer.decoder(tgt_emb, memory, tgt_mask=tgt_mask)
        output = output.transpose(0,1)  # [1, t, embed_size]
        output = model.fc_out(output)    # [1, t, output_dim]
        output = nn.functional.log_softmax(output, dim=-1)
        next_token = output[0, -1].argmax().item()
        if next_token == eos_token:
            break
        tgt_indices.append(next_token)
    
    predicted_tokens = [int_to_vocab[idx] for idx in tgt_indices[1:]]
    return predicted_tokens

In [13]:
trained_model = trainer.get_model()
trained_model.eval()

TransformerModel(
  (src_embedding): Embedding(67, 32, padding_idx=0)
  (tgt_embedding): Embedding(67, 32, padding_idx=0)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (pos_decoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
          )
          (linear1): Linear(in_features=32, out_features=64, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=64, out_features=32, bias=True)
          (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): D

# Prediction Example

In [14]:
# Pick a sample from the dataset (for example, first sample)
sample_input, sample_target = dataset[0]
predicted_expansion = predict_sample(trained_model, sample_input, vocab_to_int, int_to_vocab, max_len=30)
print("Function Tokens (input):", [int_to_vocab[token] for token in sample_input.tolist()])
print("Predicted Expansion:", " ".join(predicted_expansion))

Function Tokens (input): ['asin', '(', 'x', ')']
Predicted Expansion: x**3/6 + x
