In [1]:
from transformers import T5Config, T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, TrainerCallback
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from Levenshtein import distance as levenshtein_distance
import json
import os
import torch
import pandas as pd

In [2]:
train_file_path = 'D:\\XJTLU\\YEAR4\\FYP\\Train.csv'
data = pd.read_csv(train_file_path)

train_val_data, test_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, val_data = train_test_split(train_val_data, test_size=0.1111, random_state=42)

device = torch.device("cuda")

In [3]:
class NLtoDSLDataSet(Dataset):
    def __init__(self, tokenizer, data, max_length=128):
        self.tokenizer = tokenizer
        self.data = data
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        input_text = item['input']
        target_text = item['output']
        
        input_encoding = self.tokenizer(input_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
        target_encoding = self.tokenizer(target_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
        
        inputs = {
            'input_ids': input_encoding['input_ids'].flatten(),
            'attention_mask': input_encoding['attention_mask'].flatten(),
            'labels': target_encoding['input_ids'].flatten()
        }
        
        return inputs

In [4]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')

train_dataset = NLtoDSLDataSet(tokenizer, train_data)
val_dataset = NLtoDSLDataSet(tokenizer, val_data)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [5]:
class EvalCallback(TrainerCallback):
    def on_train_end(self, args, state, control, **kwargs):
        print("Final evaluation on validation set.")
        metrics = trainer.evaluate()
        print(metrics)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    do_train=True,
    evaluation_strategy="no",
    load_best_model_at_end=True,
    logging_steps=10,
    save_strategy="no"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks=[EvalCallback()]
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None)


  0%|          | 0/1500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [6]:
evaluation_result = trainer.evaluate()
print(evaluation_result)

  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_loss': 0.0003921140159945935, 'eval_runtime': 8.4643, 'eval_samples_per_second': 118.144, 'eval_steps_per_second': 14.768, 'epoch': 3.0}


In [8]:
def generate_prediction(input_text, model, tokenizer, device):
    model.eval()
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    output_ids = model.generate(input_ids)[0]
    output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output_text

sample_input = "Could you allocate 26 proportion to debt instruments in ewqeqwqrqrqw?"
print(generate_prediction(sample_input, model, tokenizer, device))

SET ETF ewqeqwqrqrqw WITH B


In [9]:
def evaluate_model(test_data, model, tokenizer, device, generate_func):
    rouge = Rouge()
    total_bleu_score = 0
    total_rouge_score = {"rouge-1": {"f": 0, "p": 0, "r": 0}, "rouge-2": {"f": 0, "p": 0, "r": 0}, "rouge-l": {"f": 0, "p": 0, "r": 0}}
    total_levenshtein = 0
    correct = 0
    total = 0

    for index, row in test_data.iterrows():
        input_text = row['input']
        expected_output = row['output']
        predicted_output = generate_func(input_text, model, tokenizer, device)
        
        # BLEU Score
        reference = [expected_output.lower().split()]
        candidate = predicted_output.lower().split()
        bleu_score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
        total_bleu_score += bleu_score
        
        # ROUGE Score
        scores = rouge.get_scores(predicted_output, expected_output)
        for key in total_rouge_score:
            for metric in total_rouge_score[key]:
                total_rouge_score[key][metric] += scores[0][key][metric]
        
        # Levenshtein Distance
        lev_dist = levenshtein_distance(predicted_output.lower(), expected_output.lower())
        total_levenshtein += lev_dist
        
        # Accuracy
        if predicted_output.strip().lower() == expected_output.strip().lower():
            correct += 1

        total += 1

    average_bleu = total_bleu_score / total
    average_rouge = {key: {k: v / total for k, v in total_rouge_score[key].items()} for key in total_rouge_score}
    average_levenshtein = total_levenshtein / total
    accuracy = correct / total
    
    return {
        "average_bleu": average_bleu,
        "average_rouge": average_rouge,
        "average_levenshtein": average_levenshtein,
        "accuracy": accuracy
    }

def print_evaluation_results(title, results):
    print(title + ":")
    for key, value in results.items():
        if isinstance(value, dict):
            print(f"  {key}: {{")
            for subkey, subvalue in value.items():
                print(f"    {subkey}: {subvalue}")
            print("  }")
        else:
            print(f"  {key}: {value}")
    print()

In [10]:
results = evaluate_model(test_data, model, tokenizer, device, generate_prediction)
print_evaluation_results("Evaluation Results", results)

Evaluation Results:
  average_bleu: 0.7765085200527768
  average_rouge: {
    rouge-1: {'f': 0.8688809474382199, 'p': 0.9353142857142821, 'r': 0.8197142857142822}
    rouge-2: {'f': 0.8432848435671035, 'p': 0.9201666666666681, 'r': 0.7896666666666672}
    rouge-l: {'f': 0.8688809474382199, 'p': 0.9353142857142821, 'r': 0.8197142857142822}
  }
  average_levenshtein: 4.018
  accuracy: 0.379



In [33]:
def identify_and_print_errors(test_data, model, tokenizer, device, generate_func):
    errors = []
    for index, row in test_data.iterrows():
        input_text = row['input']
        expected_output = row['output']
        predicted_output = generate_func(input_text, model, tokenizer, device)
        
        if predicted_output.strip().lower() != expected_output.strip().lower():
            errors.append({
                "input": input_text,
                "expected": expected_output,
                "predicted": predicted_output
            })

    if errors:
        for error in errors:
            print(f"Input: {error['input']}")
            print(f"Expected: {error['expected']}")
            print(f"Predicted: {error['predicted']}")
            print("---------------------------------------------------")
    else:
        print("No errors found, all predictions match the expected outputs.")

In [34]:
identify_and_print_errors(test_data, model, tokenizer, device, generate_prediction)



Input: position TechGrowth by adding 23 proportion medium stocks, please.
Expected: SET ETF TechGrowth WITH MID_CAP_STOCKS = 23%
Predicted: SET ETF TechGrowth WITH MID_CAP_STOCKS = 2
---------------------------------------------------
Input: update 32% to mid-size stocks in GlobalEquityFund
Expected: UPDATE ETF GlobalEquityFund WITH MID_CAP_STOCKS = 32%
Predicted: UPDATE ETF GlobalEquityFund WITH MID_CAP_STOC
---------------------------------------------------
Input: designateing AlphaFund to include 2% more minor stocks.
Expected: SET ETF AlphaFund WITH SMALL_CAP_STOCKS = 2%
Predicted: SET ETF AlphaFund WITH SMALL_CAP_STOCKS =
---------------------------------------------------
Input: set 52 percentage to small-size stocks in GreenEnergyInvest
Expected: SET ETF GreenEnergyInvest WITH SMALL_CAP_STOCKS = 52%
Predicted: SET ETF GreenEnergyInvest WITH SMALL_CAP_STOC
---------------------------------------------------
Input: Please change mid-size stocks by 6 percentage in BetaPortfolio.
E

In [35]:
def generate_prediction_new(input_text, model, tokenizer, device='cpu'):
    model.to(device)
    model.eval() 
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    max_new_tokens = 50 
    output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens)[0]
    output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output_text

sample_input = "Could you allocate 26 proportion to debt instruments in ewqeqwqrqrqw?"
print(generate_prediction_new(sample_input, model, tokenizer, device))

SET ETF ewqeqwqrqrqw WITH BONDS = 26%


In [36]:
results_new = evaluate_model(test_data, model, tokenizer, device, generate_prediction_new)
print_evaluation_results("Evaluation Results with new Function", results_new)

Evaluation Results with new Function:
  average_bleu: 1.0
  average_rouge: {
    rouge-1: {'f': 0.9999999950000241, 'p': 1.0, 'r': 1.0}
    rouge-2: {'f': 0.9999999950000241, 'p': 1.0, 'r': 1.0}
    rouge-l: {'f': 0.9999999950000241, 'p': 1.0, 'r': 1.0}
  }
  average_levenshtein: 0.0
  accuracy: 1.0



In [37]:
sample_input = "Divert 100% of assets in RiskyVentures to bonds immediately due to market crash."
print(generate_prediction_new(sample_input, model, tokenizer, device))

SET ETF RiskyVentures WITH BONDS = 100%


In [38]:
test_file_path = 'D:\\XJTLU\\YEAR4\\FYP\\Test.csv'
test_data_more = pd.read_csv(test_file_path)
test_data_more.head()

Unnamed: 0,input,output
0,reviseing GlobalEquityFund to include 62 perce...,UPDATE ETF GlobalEquityFund WITH MID_CAP_STOCK...
1,Please place mid-size stocks by 94% in GreenEn...,SET ETF GreenEnergyInvest WITH MID_CAP_STOCKS ...
2,I'm considering updateing PortfolioA with an a...,UPDATE ETF PortfolioA WITH MID_CAP_STOCKS = 82%
3,place 13 percentage to mid-cap stocks in Crypt...,SET ETF CryptoAssets WITH MID_CAP_STOCKS = 13%
4,position 38% to fixed-income securities in Rea...,SET ETF RealEstateHoldings WITH BONDS = 38%


In [39]:
results_more = evaluate_model(test_data_more, model, tokenizer, device, generate_prediction_new)
print_evaluation_results("Evaluation Results with new Function", results_more)

Evaluation Results with new Function:
  average_bleu: 0.9816945034422906
  average_rouge: {
    rouge-1: {'f': 0.9843749950390615, 'p': 0.984375, 'r': 0.984375}
    rouge-2: {'f': 0.9830729117057282, 'p': 0.9830729166666666, 'r': 0.9830729166666666}
    rouge-l: {'f': 0.9843749950390615, 'p': 0.984375, 'r': 0.984375}
  }
  average_levenshtein: 0.34375
  accuracy: 0.953125



In [40]:
identify_and_print_errors(test_data_more, model, tokenizer, device, generate_prediction_new)

Input: input
Expected: output
Predicted: SET ETF WITH BONDS =
---------------------------------------------------
Input: Divert 100% of assets in RiskyVentures to bonds immediately due to market crash.
Expected: UPDATE ETF RiskyVentures WITH BONDS = 100%
Predicted: SET ETF RiskyVentures WITH BONDS = 100%
---------------------------------------------------
Input: Zero out the small-cap stocks in TechInnovationFund as a strategic move to refocus.
Expected: UPDATE ETF TechInnovationFund WITH SMALL_CAP_STOCKS = 0%
Predicted: UPDATE ETF TechInnovationFund WITH SMALL_CAP_STOCKS = 10%
---------------------------------------------------
Input: Set the stakes in GlobalEquity to large-caps by 27%, especially focusing on stocks.
Expected: UPDATE ETF GlobalEquity WITH LARGE_CAP_STOCKS = 27%
Predicted: SET ETF GlobalEquity WITH LARGE_CAP_STOCKS = 27%
---------------------------------------------------
Input: Completely divest from bonds in RiskPortfolio given the new tax implications.
Expected: UPD