# Import libraries

In [1]:
# Step 1: Import necessary libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoConfig
from accelerate import Accelerator
from torch.utils.data import DataLoader, Dataset
from datasets import Dataset, DatasetDict
import json
import os
from IPython.display import display, Math
from tqdm import tqdm
import bitsandbytes as bnb
import torch.nn as nn
import torch
from peft import LoraConfig, get_peft_model
from sklearn.model_selection import train_test_split
import pandas as pd
import wandb

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['HF_HOME'] = '/workspace'
print("HF_HOME is set to:", os.getenv('HF_HOME'))

HF_HOME is set to: /workspace


# Load dataset

In [3]:
# Step 1: Load the dataset from the updated path
with open('../combined_output.json', 'r') as f:
    data = json.load(f)

# Extract relevant columns
latex_expressions = [item["latex_expression"] for item in data]
solutions = [item["solution"] for item in data]
equation_types = [item["equation_type"] for item in data]

# Display the first few examples to ensure everything is loaded correctly
for i in range(3):
    print(f"Latex Expression: {latex_expressions[i]}")
    print(f"Solution: {solutions[i]}")
    print()

Latex Expression: 48.67023444263988 e^{x^{2}} + 385 + 37.652777306616414 e^{- 0.3961616721565986 t^{2}}
Solution: from math import exp
from sympy import Sum, symbols
k = symbols('k')

def expr_function(t, x):
    return 48.67023444263988*exp(x**2) + 385 + 37.652777306616414*exp(-0.3961616721565986*t**2)

Latex Expression: l w + 3 x + y + 3 z + \log{\left(\left|{x^{2} - 49.64541292281292}\right| \right)} + 15.436178799866738 + \frac{0.399043442233811 e^{- \frac{\left(- mean + x\right)^{2}}{2 std_{dev}^{2}}}}{std_{dev}}
Solution: from math import sqrt
from math import exp
from sympy import Abs
from math import log, e

def expr_function(std_dev, x, w, z, y, mean, l):
    return l*w + 3*x + y + 3*z + log(Abs(x**2 - 49.64541292281292)) + 15.436178799866738 + 0.399043442233811*exp(-(-mean + x)**2/(2*std_dev**2))/std_dev

Latex Expression: p q \left(1 - p\right) \left(1 - q\right) + 7.307771168091335 e^{0.5230255874730376 t^{2}} - 3.7964629252816167 e^{0.8152910177873205 t^{2}} + 10.560026191

In [4]:
# Step 2: Create a DataFrame for easier manipulation
df = pd.DataFrame({
    "latex_expression": latex_expressions,
    "solution": solutions,
    "equation_type": equation_types
})

# Step 3: Stratified split to maintain class balance between train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["equation_type"], random_state=42)


In [5]:
# Step 3: Import the tokenizer for the T5 model

# Initialize the tokenizer for the "codet5p" model
model_checkpoint = "Salesforce/codet5-large"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Test the tokenizer on a sample LaTeX expression to ensure it's working
sample_expression = latex_expressions[0]
encoded_input = tokenizer(sample_expression)

# Print the tokenized latex expressions to check
print(f"Latex expression: {latex_expressions[0]}")
print(f"Latex to ID: {encoded_input}")
print(f"ID to Latex: {tokenizer.decode(encoded_input['input_ids'])}")

# Test the tokenizer on a sample solution to ensure it's working
sample_solution = solutions[0]
encoded_output = tokenizer(sample_solution)

# Print the tokenized solutions to check
print(f"Solution: {solutions[0]}")
print(f"Solution to ID: {encoded_output}")
print(f"ID to Solution: {tokenizer.decode(encoded_output['input_ids'])}")

Latex expression: 48.67023444263988 e^{x^{2}} + 385 + 37.652777306616414 e^{- 0.3961616721565986 t^{2}}
Latex to ID: {'input_ids': [1, 8875, 18, 9599, 3103, 5026, 6334, 22, 21607, 5482, 425, 66, 95, 92, 66, 95, 22, 9090, 397, 890, 7140, 397, 18091, 18, 9222, 22, 14509, 5082, 6028, 23147, 3461, 425, 66, 95, 17, 374, 18, 5520, 26, 2313, 2313, 9060, 28946, 6162, 5292, 268, 66, 95, 22, 9090, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
ID to Latex: <s>48.67023444263988 e^{x^{2}} + 385 + 37.652777306616414 e^{- 0.3961616721565986 t^{2}}</s>
Solution: from math import exp
from sympy import Sum, symbols
k = symbols('k')

def expr_function(t, x):
    return 48.67023444263988*exp(x**2) + 385 + 37.652777306616414*exp(-0.3961616721565986*t**2)
Solution to ID: {'input_ids': [1, 2080, 4233, 1930, 1329, 203, 2080, 27978, 93, 1930, 9352, 16, 7963, 203, 79, 273, 7963, 266



In [6]:
# Step 4: Tokenize the entire dataset to determine the max length

# Tokenize LaTeX expressions and solutions temporarily
latex_token_lengths = [len(tokenizer(expr)["input_ids"]) for expr in latex_expressions]
solution_token_lengths = [len(tokenizer(sol)["input_ids"]) for sol in solutions]

# Find the maximum number of tokens for each
max_latex_length = max(latex_token_lengths)
max_solution_length = max(solution_token_lengths)

# Print the results
print(f"Maximum number of tokens in LaTeX expressions: {max_latex_length}")
print(f"Maximum number of tokens in solutions: {max_solution_length}")


Maximum number of tokens in LaTeX expressions: 341
Maximum number of tokens in solutions: 351


In [7]:
def preprocess_data(latex_expressions, solutions, latex_max_length=max_latex_length, solution_max_length=max_solution_length):
    """
    Tokenizes the LaTeX expressions and solutions.
    LaTeX expressions are tokenized with truncation.
    Solutions are tokenized with truncation.

    Args:
    - latex_expressions (list): List of LaTeX expressions.
    - solutions (list): List of corresponding solutions (Python code).

    Returns:
    - tokenized_latex (list): List of tokenized LaTeX expressions.
    - tokenized_solutions (list): List of tokenized solutions.
    """
    # Tokenize LaTeX expressions with padding and truncation
    tokenized_latex = tokenizer(
        latex_expressions,
        truncation=True,             # Truncate if longer than max_length
        max_length=latex_max_length  # Set max length
    )
    
    # Tokenize solutions with truncation but no padding
    tokenized_solutions = tokenizer(
        solutions,
        truncation=True,               # Truncate if longer than max_length
        max_length=solution_max_length # Set max length
    )
    
    return tokenized_latex, tokenized_solutions

train_tokenized_latex, train_tokenized_solutions = preprocess_data(train_df["latex_expression"].tolist(), train_df["solution"].tolist())
val_tokenized_latex, val_tokenized_solutions = preprocess_data(val_df["latex_expression"].tolist(), val_df["solution"].tolist())

# Prepare data for Hugging Face Dataset
train_data_dict = {
    "input_ids": train_tokenized_latex["input_ids"],
    "attention_mask": train_tokenized_latex["attention_mask"],
    "labels": train_tokenized_solutions["input_ids"]
}

val_data_dict = {
    "input_ids": val_tokenized_latex["input_ids"],
    "attention_mask": val_tokenized_latex["attention_mask"],
    "labels": val_tokenized_solutions["input_ids"]
}

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_dict(train_data_dict)
val_dataset = Dataset.from_dict(val_data_dict)

# Combine into a DatasetDict
split_dataset = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset
})

split_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 34236
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 8559
    })
})

# Load model

In [8]:
# Load model in float16
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint, trust_remote_code=True)
for param in model.parameters():
    param.data = param.data.contiguous()

In [9]:
def count_parameters_and_memory(model):
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_memory = sum(p.numel() * p.element_size() for p in model.parameters() if p.requires_grad)
    return total_params, total_memory

def format_memory_size(bytes_size):
    # Convert bytes to MB or GB
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if bytes_size < 1024:
            return f"{bytes_size:.2f} {unit}"
        bytes_size /= 1024

# Assuming 'model' is the variable holding your LLM
model_parameters, model_memory = count_parameters_and_memory(model)
formatted_memory = format_memory_size(model_memory)

print(f"The model has {model_parameters:,} parameters.")
print(f"The model is taking approximately {formatted_memory} of memory.")

The model has 737,639,424 parameters.
The model is taking approximately 2.75 GB of memory.


# Data collator

In [10]:
# Step 9: Import and use the data collator

# Initialize the data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,           # Pass the tokenizer
    model=model,                    # No need to specify the model here, as we're not using it yet
    padding=True,                  # Dynamically pad sequences within each batch
    return_tensors="pt"            # Return PyTorch tensors
)

data_collator

DataCollatorForSeq2Seq(tokenizer=RobertaTokenizerFast(name_or_path='Salesforce/codet5p-770m', vocab_size=32100, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>', 'additional_special_tokens': ['<extra_id_99>', '<extra_id_98>', '<extra_id_97>', '<extra_id_96>', '<extra_id_95>', '<extra_id_94>', '<extra_id_93>', '<extra_id_92>', '<extra_id_91>', '<extra_id_90>', '<extra_id_89>', '<extra_id_88>', '<extra_id_87>', '<extra_id_86>', '<extra_id_85>', '<extra_id_84>', '<extra_id_83>', '<extra_id_82>', '<extra_id_81>', '<extra_id_80>', '<extra_id_79>', '<extra_id_78>', '<extra_id_77>', '<extra_id_76>', '<extra_id_75>', '<extra_id_74>', '<extra_id_73>', '<extra_id_72>', '<extra_id_71>', '<extra_id_70>', '<extra_id_69>', '<extra_id_68>', '<extra_id_67>', '<extra_id_66>', '<extra_id_65>', '<extra_id_64>

# Training

In [11]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mubaidullahjaved7[0m ([33mubaidullahjaved7-nust[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
# Step 1: Initialize W&B project
wandb.init(
    project="codeT5_P_last",
    name="T5_P_run_last",
    notes="No custom loss function, codet5p on train.json. Equal classes split.",
    config={
        "learning_rate": 5e-5,
        "batch_size": 8,
        "epochs": 6
    }
)

In [13]:
# Step 10: Setup training arguments
# Setup training arguments for epoch-based evaluation and saving
training_args = Seq2SeqTrainingArguments(
    output_dir="./lastmodel",              # Directory to save the model and results
    eval_strategy="epoch",               # Evaluate at the end of some step
    save_strategy="epoch",               # Save checkpoints at the end of some step
    learning_rate=wandb.config.learning_rate,  # Learning rate
    per_device_train_batch_size=wandb.config.batch_size,  # Batch size for training
    per_device_eval_batch_size=wandb.config.batch_size,   # Batch size for evaluation
    weight_decay=0.01,                   # Weight decay to prevent overfitting
    save_total_limit=3,                  # Limit the number of checkpoints to save
    num_train_epochs=wandb.config.epochs,  # Number of epochs
    predict_with_generate=True,          # Enable generation during evaluation
    fp16=False,                          # Don't use mixed precision training for faster computation
    logging_dir="./finallogs",                # Directory for storing logs
    logging_steps=500,                   # Log every 500 steps
    load_best_model_at_end=True,         # Load the best model at the end of training
    report_to="wandb",                   # Enable W&B integration
    run_name="T5_P_run_last",                # Set a distinct run name
)


# Step 10: Initialize the trainer
trainer = Seq2SeqTrainer(
    model=model,                         # The model to be trained
    args=training_args,                  # Training arguments
    train_dataset=split_dataset["train"],# Training dataset
    eval_dataset=split_dataset["validation"],# Evaluation dataset
    tokenizer=tokenizer,                 # The tokenizer
    data_collator=data_collator          # Data collator
)

# Start training
trainer.train()
wandb.finish()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss,Validation Loss
1000,0.1144,0.099964
2000,0.0983,0.089592
3000,0.0878,0.082333
4000,0.0897,0.075313
5000,0.0772,0.072667
6000,0.0749,0.067422
7000,0.073,0.064463
8000,0.0724,0.063762
9000,0.0629,0.062035
10000,0.0501,0.060063


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


VBox(children=(Label(value='0.020 MB of 0.031 MB uploaded\r'), FloatProgress(value=0.6423220973782772, max=1.0…

0,1
eval/loss,█▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁
eval/runtime,▂▄▂▂▃▅█▄▆▃▄▅▃▂▄▄▁
eval/samples_per_second,▇▅▇▇▆▄▁▅▃▆▅▄▆▇▅▅█
eval/steps_per_second,▇▅▇▇▆▄▁▅▃▆▅▄▆▇▅▅█
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▅▂▃▆█▃▂▄▇▂▄█▃▂▁▃▂▂▂▃▁▄▄▂▂▁▃▂▃▂▂▂▄▂
train/learning_rate,███▇▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▄▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁

0,1
eval/loss,0.05179
eval/runtime,180.573
eval/samples_per_second,47.399
eval/steps_per_second,11.851
total_flos,2.7472454283264e+16
train/epoch,2.0
train/global_step,17118.0
train/grad_norm,0.14968
train/learning_rate,0.0
train/loss,0.0525


# Inference model

In [15]:
# Ensure the model is in evaluation mode
model.eval()

# Select one sample from the test set
sample = split_dataset["validation"][5]  # Example, getting the first sample

# Decode the LaTeX expression from input_ids
input_ids = sample['input_ids']
latex_expression = tokenizer.decode(input_ids, skip_special_tokens=True)
print(f"LaTeX Expression: {latex_expression}")

# Tokenize the LaTeX expression for model input
encoded_input = tokenizer(latex_expression, return_tensors="pt", max_length=167, truncation=True).input_ids.to(model.device)

# Generate the Python solution
generated_ids = model.generate(encoded_input, max_length=256, num_beams=5, early_stopping=True)

# Decode the generated output to text
generated_solution = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(f"Generated Python Solution:\n{generated_solution}")

LaTeX Expression: y^{3} - 8649 + \sum_{x=1}^{4} 2^{x}
Generated Python Solution:


def summation_function(x, y):
    return y**3 - 8649 + (sum(2**x for x in range(1, 4+1)))



In [16]:
# Select one sample from the test set
sample = split_dataset["validation"][1]  # Example, getting the first sample

# Decode the LaTeX expression from input_ids
input_ids = sample['input_ids']
latex_expression = tokenizer.decode(input_ids, skip_special_tokens=True)
print(f"LaTeX Expression: {latex_expression}")

# Tokenize the LaTeX expression for model input
encoded_input = tokenizer(latex_expression, return_tensors="pt", max_length=167, truncation=True).input_ids.to(model.device)

# Generate the Python solution
generated_ids = model.generate(encoded_input, max_length=256, num_beams=5, early_stopping=True)

# Decode the generated output to text
generated_solution = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(f"Generated Python Solution:\n{generated_solution}")

LaTeX Expression: \log_e{\left(\log_e{\left(9.10219523396356 c^{3} + 12.645535131863 c^{2} - 9.55238311519424 x \right)} \right)}
Generated Python Solution:
import math
import cmath

def log_function(c, x):
    value = 9.10219523396356*c**3 + 12.645535131863*c**2 - 9.55238311519424*x
    if value < 0:
        return cmath.log(value)
    else:
        return math.log(value)


# Load saved model from checkpoint

In [17]:
import torch
model_checkpoint = './results/checkpoint-17000'

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

for param in model.parameters():
    param.data = param.data.contiguous()

# If you have a GPU, move the model to the GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32100, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32100, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=4096, bias=False)
              (wo): Linear(in_features=4096, out_features=1024, bias=False)
              (d

# Try model on public test set

In [18]:
from tqdm import tqdm
import json

# Path to the test JSON file
test_json_path = '../public_test_new_no_sol_no_out.json'

# Load the test data
with open(test_json_path, 'r') as f:
    test_data = json.load(f)

# Loop through the test data with tqdm progress bar
for item in tqdm(test_data, desc="Generating solutions", unit="sample"):
    latex_expression = item['latex_expression']
    
    # Tokenize the LaTeX expression
    encoded_input = tokenizer(latex_expression, return_tensors="pt", max_length=max_latex_length, truncation=True).input_ids.to(model.device)
    
    # Generate the Python solution
    generated_ids = model.generate(encoded_input, max_length=max_solution_length, num_beams=5, early_stopping=True)
    
    # Decode the generated output to text
    generated_solution = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    # Add the generated solution to the item
    item['solution'] = generated_solution

output_json_path = 'public_test_with_solutions_4.json'

# Save the updated test data with solutions
with open(output_json_path, 'w') as f:
    json.dump(test_data, f, indent=4)
    

Generating solutions: 100%|██████████| 1004/1004 [22:11<00:00,  1.33s/sample]


In [19]:
import inspect

def compile_code(code):
    try:
        # Compile the code into an executable module
        compiled_code = compile(code, "string", "exec")
        # Extract function name
        func_name = code.split('def ')[1].split('(')[0]
        return compiled_code, func_name
    except Exception as e:
        print(f"Compilation Error: {e}")
        return None, None

def run_code(compiled_code, func_name, test_input):
    try:
        # Execute the compiled code along with necessary imports
        exec(compiled_code, globals())

        # Get the function object
        func = eval(func_name)

        # Convert test_input dictionary to a list of values
        input_values = list(test_input.values())

        # Get the number of parameters the function expects
        func_params = inspect.signature(func).parameters
        num_params = len(func_params)

        # If more input values are provided than the function parameters, only take the parameters provided in the function parameters
        if len(input_values) > num_params:
            for i in range(num_params):
                input_values[i] = test_input[list(func_params.keys())[i]]
            input_values = input_values[:num_params]

        # Run the function with the provided input
        output = func(*input_values)

        # Check if output is complex and if so, return "complex"
        if "I" in str(output):
          output = str(output)
          output = output.replace("I", "j")
          if "*" in output:
            output = output.replace("*", "")
          if " " in output:
            output = output.replace(" ", "")
          return output
        elif output == float('inf') or output == float('-inf'):
          return "inf"
        else:
          return float(output)  # Return the output as a float

    except ZeroDivisionError as e:
        print(f"ZeroDivisionError: {e}")
        return "inf"
    except TypeError as e:
        print(f"TypeError: {e}")
        return None
    except Exception as e:
        print(f"Exception: {e}")
        return 'complex'

In [20]:
# Function to load dataset from JSON file
def load_dataset(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: The file {file_path} is not a valid JSON.")
        return None

# Function to extract solution expressions
def extract_solutions(data):
    solutions = [item['solution'] for item in data]
    return solutions

# Function to save solutions to a new JSON file
def save_solutions(solutions, output_path):
    try:
        with open(output_path, 'w') as outfile:
            json.dump(solutions, outfile, indent=4)
        print(f"Solutions saved successfully to {output_path}.")
    except Exception as e:
        print(f"Error saving solutions: {e}")

# Full process
def save_solution_expressions(input_path, output_path):
    data = load_dataset(input_path)
    if data:
        solutions = extract_solutions(data)
        save_solutions(solutions, output_path)

In [21]:
# Save extracted solutions to file (Run 1 time)
input_path = 'public_test_with_solutions_4.json'
output_path = 'test_solutions3_7.json'

save_solution_expressions(input_path, output_path)

Solutions saved successfully to test_solutions3_7.json.


In [22]:
# Function to evaluate the generated code
def evaluate_test_cases(generated_code_list, test_cases, task_id=None):
    top_code = generated_code_list
    top_compiled_code, top_func_name = compile_code(top_code)

    if top_compiled_code is None:
        return 0.0, []  # Top Code not compiled successfully, so accuracy is 0.0

    test_results = []
    for i, test_case in enumerate(test_cases):
        output = run_code(top_compiled_code, top_func_name, test_case)
        test_results.append(output)

    return test_results

In [23]:
# Function to evaluate all problems
def evaluate_all_problems(data, generated_code_lists):
    results = []
    for i, problem in enumerate(data):
        # Print progress
        print()
        print(f"Evaluating problem {i+1}/{len(data)}: {problem['task_id']}")

        # Extract test cases and expected outputs
        test_cases = [case['input'] for case in problem['test_cases']]
        print(f"Test cases: {test_cases}")

        # Get the corresponding generated code list
        generated_code_list = generated_code_lists[i]
        print(f"Generated code: {generated_code_list}")

        # Evaluate the generated code
        test_results = evaluate_test_cases(generated_code_list, test_cases, problem['task_id'])
        print(f"Test results: {test_results}")

        # Append results as a list with task_id and test_results array
        results.append([problem['task_id'], test_results])

        # Print progress
        print(f"Problem {i+1}/{len(data)} evaluated successfully.")
    return results

# Specifying paths to total Data and Generated Data files
file_path = input_path
generated_code_path = output_path

data = load_dataset(file_path)
with open(generated_code_path, 'r') as infile:
    generated_code_lists = json.load(infile)

# Evaluate all problems
results = evaluate_all_problems(data, generated_code_lists)


Evaluating problem 1/1004: ea7615da
Test cases: [{'x': 0.29686280231110906, 'y': -1.882691694932987, 'z': 3.431982744644353}, {'x': -8.319838193104632, 'y': -7.49310048950565, 'z': -7.424376685773753}, {'x': 9.505062946262541, 'y': -0.5011321452467001, 'z': 8.213648132395043}, {'x': 2.2688166633745244, 'y': -4.093426980093597, 'z': -5.543633292104541}, {'x': -8.483149816788513, 'y': 3.7519583070867686, 'z': -5.358424045484647}]
Generated code: def multivariable_function(x, y, z):
    return 6*x**5 + x**4 + 4*x**3 + 2*x**2 + 9*x + 9*y**4 + 9*y**3 + 10*y**2 + 9*y + z**5 + 6*z**4 + 7*z**3 + 8*z**2 + 2*z

Test results: [1767.0756402673871, -218315.8977990915, 546495.045841518, 1978.1726513244744, -258686.07893910876]
Problem 1/1004 evaluated successfully.

Evaluating problem 2/1004: c29bee81
Test cases: [{'x': -7.064625092178776}, {'x': -2.405631413932814}, {'x': -4.083422395997564}, {'x': -7.106927332643515}, {'x': -1.7124446894062277}]
Generated code: def algebraic_function(x):
    retu

# Train model further

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",              # Directory to save the model and results
    eval_strategy="steps",               # Evaluate at the end of some step
    save_strategy="steps",               # Save checkpoints at the end of some step
    learning_rate=5e-6,  # Learning rate
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    weight_decay=0.01,                   # Weight decay to prevent overfitting
    save_total_limit=3,                  # Limit the number of checkpoints to save
    num_train_epochs=6,  # Number of epochs
    predict_with_generate=True,          # Enable generation during evaluation
    fp16=False,                          # Don't use mixed precision training for faster computation
    logging_dir="./logs",                # Directory for storing logs
    logging_steps=500,                   # Log every 500 steps
    load_best_model_at_end=True,         # Load the best model at the end of training
    #report_to="wandb",                   # Enable W&B integration
    #run_name="T5_P_run1",                # Set a distinct run name
    save_steps=500,                      # Save checkpoints every 500 steps
    eval_steps=500,                      # Evaluate every 500 steps
)


# Step 10: Initialize the trainer
trainer = Seq2SeqTrainer(
    model=model,                         # The model to be trained
    args=training_args,                  # Training arguments
    train_dataset=split_dataset["train"],# Training dataset
    eval_dataset=split_dataset["validation"],# Evaluation dataset
    tokenizer=tokenizer,                 # The tokenizer
    data_collator=data_collator          # Data collator
)

# Start training
trainer.train()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss,Validation Loss
500,0.0244,0.03142
1000,0.0238,0.031491
1500,0.0212,0.031831
2000,0.0209,0.031897


In [None]:
import csv

# Function to save results to a CSV file
def save_results_to_csv(results, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write header
        writer.writerow(['id', 'outputs'])
        # Write each row
        for result in results:
            writer.writerow(result)

csv_output_path = 'evaluation_results3_7.csv'  # Specify your desired output file name

# Save the results to a CSV file
save_results_to_csv(results, csv_output_path)