Importing libraries

In [11]:
## Import required libraries
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from transformers import BertTokenizer, BertModel, AdamW
import warnings
warnings.filterwarnings('ignore')

Storing model weights

In [12]:
## Make a directory for storing model weights
output_dir=r'C:\Users\DELL\Documents\FORSAE\transformer-model-weights'
os.makedirs(output_dir,exist_ok=True)

Data Preprocessing

In [13]:
## Load target dataset
def load_data(df):
    df=pd.read_csv(df)
    return df
data_path=r'C:\Users\DELL\Documents\FORSAE\taylor_data.csv'
data=load_data(data_path)
data.shape

(114, 2)

Loading Tokenizer and Transformer Model

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')## Load BERT Tokenizer
bert_model = BertModel.from_pretrained('bert-base-uncased')## Load BERT Model

In [15]:
data.head()

Unnamed: 0,Functions,Taylor Series
0,-9*x**3 + 5*x**2 + 2*x - 5,-9*x**3 + 5*x**2 + 2*x - 5
1,-2*x**3 + x**2 - x + 8,-2*x**3 + x**2 - x + 8
2,9*x**3 - 6*x**2 - 6*x + 5,9*x**3 - 6*x**2 - 6*x + 5
3,3*x**3 - 4*x**2 - 7*x + 7,3*x**3 - 4*x**2 - 7*x + 7
4,-9*x**3 - x**2 - 8*x + 9,-9*x**3 - x**2 - 8*x + 9


Apply Tokenization

In [16]:
## Tokenization
def tokenize(target_data):
  target_encodings=tokenizer(
      target_data,
      max_length=128,
      padding='max_length',
      truncation=True,
      return_tensors='pt'
  )
  return target_encodings

## Apply tokenization and get data containing features and targets
feats=[]## Features list
targets=[]## Targets list
training_data=pd.DataFrame()## DataFrame for features and targets
for i in range(0,len(data['Functions'])):
     inp_encoded=tokenize(data['Functions'][i])## Tokenize features
     out_encoded=tokenize(data['Taylor Series'][i])## Tokenize targets
     #target=target.flatten()
     feats.append(inp_encoded)
     targets.append(out_encoded)
training_data['Functions']=feats
training_data['Taylor Expansion']=targets
print(training_data.shape)

(114, 2)


Splitting the data

In [17]:
train_data,val_data=train_test_split(training_data,test_size=0.2,random_state=42)

In [18]:
## Convert features and targets into separate lists for training
train_inputs = list(train_data['Functions'])## Training data features
train_outputs = list(train_data['Taylor Expansion'])## Training data targets
val_inputs = list(val_data['Functions'])## Validation data features
val_outputs = list(val_data['Taylor Expansion'])## Validation data targets
print(type(train_inputs[0]))
print(type(val_inputs[0]))
print(type(train_outputs[0]))
print(type(val_outputs[0]))

<class 'transformers.tokenization_utils_base.BatchEncoding'>
<class 'transformers.tokenization_utils_base.BatchEncoding'>
<class 'transformers.tokenization_utils_base.BatchEncoding'>
<class 'transformers.tokenization_utils_base.BatchEncoding'>


Model Training and Evaluation

In [19]:
class TranformerModel(nn.Module):
    def __init__(self,bert_model,vocab_size):
        super(TranformerModel,self).__init__()
        self.bert=bert_model
        self.decoder=nn.Linear(768,vocab_size)
        
    def forward(self,input_ids,attention_mask):
        outputs=self.bert(input_ids=input_ids,attention_mask=attention_mask)
        last_hidden_state=outputs.last_hidden_state
        logits=self.decoder(last_hidden_state)
        return logits
    
model=TranformerModel(bert_model,tokenizer.vocab_size)
optimizer=AdamW(model.parameters(),lr=7e-3)
criterion=nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

epochs=50
model.to(device)

for epoch in range(0,epochs):
    model.train()
    total_loss=0
    for i in range(0,len(train_inputs)):
        train_input_ids=train_inputs[i]['input_ids'].to(device)
        train_attention_mask = train_inputs[i]['attention_mask'].to(device)
        train_output_ids = train_outputs[i]['input_ids'].to(device)
        
        optimizer.zero_grad()
        logits=model(train_input_ids,train_attention_mask)
        loss=criterion(logits.view(-1,logits.size(-1)),train_output_ids.view(-1))
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
    
    avg_loss=total_loss/len(train_inputs)
    print(f'Epoch :{epoch}----> Train_loss :{avg_loss:.5f}')
    if epoch%20==0:
        weights_path=os.path.join(output_dir,f'weights_step_{epoch+1}.pth')
        torch.save(model.state_dict(),weights_path)
    
    model.eval()
    val_loss=0
    with torch.no_grad():
        for i in range(0,len(val_inputs)):
            val_input_ids=val_inputs[i]['input_ids'].to(device)
            val_attention_mask = val_inputs[i]['attention_mask'].to(device)
            val_output_ids = val_outputs[i]['input_ids'].to(device)
            
            logits=model(val_input_ids,val_attention_mask)
            loss=criterion(logits.view(-1,logits.size(-1)),val_output_ids.view(-1))
            val_loss+=loss.item()
            
        avg_val_loss=val_loss/len(val_inputs)
        print(f'              Validation_loss :{avg_val_loss:.5f}')

Epoch :0----> Train_loss :3.33124
              Validation_loss :2.48206
Epoch :1----> Train_loss :2.55988
              Validation_loss :2.43808
Epoch :2----> Train_loss :2.51557
              Validation_loss :2.42804
Epoch :3----> Train_loss :2.49076
              Validation_loss :2.43271
Epoch :4----> Train_loss :2.48215
              Validation_loss :2.42501
Epoch :5----> Train_loss :2.47550
              Validation_loss :2.41710
Epoch :6----> Train_loss :2.47679
              Validation_loss :2.41105
Epoch :7----> Train_loss :2.47106
              Validation_loss :2.40776
Epoch :8----> Train_loss :2.46706
              Validation_loss :2.40274
Epoch :9----> Train_loss :2.47001
              Validation_loss :2.40652
Epoch :10----> Train_loss :2.46412
              Validation_loss :2.39916
Epoch :11----> Train_loss :2.45885
              Validation_loss :2.40131
Epoch :12----> Train_loss :2.45295
              Validation_loss :2.40006
Epoch :13----> Train_loss :2.46556
             