In [1]:
import torch
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments, TrainerCallback
import pandas as pd


train_file_path = 'D:\\XJTLU\\YEAR4\\FYP\\TrainDirect.csv'
data = pd.read_csv(train_file_path)

train_val_data, test_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, val_data = train_test_split(train_val_data, test_size=0.1111, random_state=42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class NLtoPythonDataSet(Dataset):
    def __init__(self, tokenizer, data, max_length=128):
        self.tokenizer = tokenizer
        self.data = data
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        input_text = item['input']
        target_text = item['output']
        
        input_encoding = self.tokenizer(input_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
        target_encoding = self.tokenizer(target_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
        
        inputs = {
            'input_ids': input_encoding['input_ids'].flatten(),
            'attention_mask': input_encoding['attention_mask'].flatten(),
            'labels': target_encoding['input_ids'].flatten()
        }
        
        return inputs

tokenizer = BartTokenizer.from_pretrained('D:\\XJTLU\\YEAR4\\FYP\\bart-base')
model = BartForConditionalGeneration.from_pretrained('D:\\XJTLU\\YEAR4\\FYP\\bart-base')

train_dataset = NLtoPythonDataSet(tokenizer, train_data)
val_dataset = NLtoPythonDataSet(tokenizer, val_data)

class EvalCallback(TrainerCallback):
    def on_train_end(self, args, state, control, **kwargs):
        print("Final evaluation on validation set.")
        metrics = trainer.evaluate()
        print(metrics)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    do_train=True,
    evaluation_strategy="epoch",
    logging_steps=10,
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks=[EvalCallback()]
)

trainer.train()


dataloader_config = DataLoaderConfiguration(dispatch_batches=None)


  0%|          | 0/250 [00:00<?, ?it/s]

{'loss': 10.4169, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}
{'loss': 9.5873, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.08}
{'loss': 8.7016, 'learning_rate': 3e-06, 'epoch': 0.12}
{'loss': 7.9028, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.16}
{'loss': 7.1087, 'learning_rate': 5e-06, 'epoch': 0.2}
{'loss': 6.353, 'learning_rate': 6e-06, 'epoch': 0.24}
{'loss': 5.5799, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.28}
{'loss': 4.7889, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.32}
{'loss': 3.8463, 'learning_rate': 9e-06, 'epoch': 0.36}
{'loss': 3.1017, 'learning_rate': 1e-05, 'epoch': 0.4}
{'loss': 2.6238, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.44}
{'loss': 2.2672, 'learning_rate': 1.2e-05, 'epoch': 0.48}
{'loss': 2.001, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.52}
{'loss': 1.7197, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.56}
{'loss': 1.482, 'learning_rate': 1.5e-05, 'epoch': 0.6}
{'loss': 1.2667, 'learning_r

  0%|          | 0/63 [00:00<?, ?it/s]

{'eval_loss': 0.012822696007788181, 'eval_runtime': 74.384, 'eval_samples_per_second': 6.722, 'eval_steps_per_second': 0.847, 'epoch': 1.0}
{'train_runtime': 3014.0882, 'train_samples_per_second': 1.327, 'train_steps_per_second': 0.083, 'train_loss': 3.3090200004577635, 'epoch': 1.0}
Final evaluation on validation set.


  0%|          | 0/63 [00:00<?, ?it/s]

{'eval_loss': 0.012822696007788181, 'eval_runtime': 74.4442, 'eval_samples_per_second': 6.716, 'eval_steps_per_second': 0.846, 'epoch': 1.0}
{'eval_loss': 0.012822696007788181, 'eval_runtime': 74.4442, 'eval_samples_per_second': 6.716, 'eval_steps_per_second': 0.846, 'epoch': 1.0}


TrainOutput(global_step=250, training_loss=3.3090200004577635, metrics={'train_runtime': 3014.0882, 'train_samples_per_second': 1.327, 'train_steps_per_second': 0.083, 'train_loss': 3.3090200004577635, 'epoch': 1.0})

In [9]:
evaluation_result = trainer.evaluate()
print(evaluation_result)

  0%|          | 0/63 [00:00<?, ?it/s]

{'eval_loss': 0.0013685429003089666, 'eval_runtime': 3.2448, 'eval_samples_per_second': 154.095, 'eval_steps_per_second': 19.416, 'epoch': 3.0}


In [2]:
def generate_prediction(input_text, model, tokenizer, device):
    model.eval()
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    output_ids = model.generate(input_ids)[0]
    output_text = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output_text

sample_input = "How about we assign 22 percentage to fixed-income securities and 87% to small-cap stocks in ValueInvest?"
print(generate_prediction(sample_input, model, tokenizer, device))



Strategy for ValueInvest:
1. Set 22% of assets to fixed-


In [3]:
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from Levenshtein import distance as levenshtein_distance

def evaluate_model(test_data, model, tokenizer, device, generate_func):
    rouge = Rouge()
    total_bleu_score = 0
    total_rouge_score = {"rouge-1": {"f": 0, "p": 0, "r": 0}, "rouge-2": {"f": 0, "p": 0, "r": 0}, "rouge-l": {"f": 0, "p": 0, "r": 0}}
    total_levenshtein = 0
    correct = 0
    total = 0

    for index, row in test_data.iterrows():
        input_text = row['input']
        expected_output = row['output']
        predicted_output = generate_func(input_text, model, tokenizer, device)
        
        # BLEU Score
        reference = [expected_output.lower().split()]
        candidate = predicted_output.lower().split()
        bleu_score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
        total_bleu_score += bleu_score
        
        # ROUGE Score
        scores = rouge.get_scores(predicted_output, expected_output)
        for key in total_rouge_score:
            for metric in total_rouge_score[key]:
                total_rouge_score[key][metric] += scores[0][key][metric]
        
        # Levenshtein Distance
        lev_dist = levenshtein_distance(predicted_output.lower(), expected_output.lower())
        total_levenshtein += lev_dist
        
        # Accuracy
        if predicted_output.strip().lower() == expected_output.strip().lower():
            correct += 1

        total += 1

    average_bleu = total_bleu_score / total
    average_rouge = {key: {k: v / total for k, v in total_rouge_score[key].items()} for key in total_rouge_score}
    average_levenshtein = total_levenshtein / total
    accuracy = correct / total
    
    return {
        "average_bleu": average_bleu,
        "average_rouge": average_rouge,
        "average_levenshtein": average_levenshtein,
        "accuracy": accuracy
    }

def print_evaluation_results(title, results):
    print(title + ":")
    for key, value in results.items():
        if isinstance(value, dict):
            print(f"  {key}: {{")
            for subkey, subvalue in value.items():
                print(f"    {subkey}: {subvalue}")
            print("  }")
        else:
            print(f"  {key}: {value}")
    print()

In [4]:
results = evaluate_model(test_data, model, tokenizer, device, generate_prediction)
print_evaluation_results("Evaluation Results", results)

Evaluation Results:
  average_bleu: 0.016972044017312338
  average_rouge: {
    rouge-1: {'f': 0.4117853099360254, 'p': 0.9618102453102424, 'r': 0.2625917494270437}
    rouge-2: {'f': 0.3113691723868085, 'p': 0.9539571428571465, 'r': 0.18645909299624552}
    rouge-l: {'f': 0.4117853099360254, 'p': 0.9618102453102424, 'r': 0.2625917494270437}
  }
  average_levenshtein: 261.176
  accuracy: 0.0

