In [1]:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import torch
import numpy as np
seed = 42
# NumPy
np.random.seed(seed)

# PyTorch
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


if torch.cuda.is_available():
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Number of GPUs available: 1
GPU 0: NVIDIA RTX A5000


In [2]:
import polars as pl
import json
from datasets import load_from_disk
import yaml
import wandb

## Repo funtions
from taco_utils.evaluators.TACOEvaluator import TACOEvaluator
from taco_utils import run_inference, parse_generations

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
PATH  = "../data/TACO/processed"
train = pl.read_ipc(f"{PATH}/train.feather")
train_solutions = pl.read_ipc(f"{PATH}/train_solutions.feather")
train_tests = pl.read_ipc(f"{PATH}/train_evaluation_tests.feather")
train_dict = load_from_disk("../data/TACO/train.hf")

In [4]:
config = yaml.safe_load(open("config.yaml"))
config

{'inference_configs': {'instruction': 'You are a coding generation tool that will solve a problem using Python',
  'saving_path': 'generations',
  'model_path': '../models/llms/Llama-3.2-3B-Instruct',
  'num_returns': 20,
  'num_generations': 20,
  'log_datetime': False,
  'quantization': True,
  'start_idx': 0,
  'end_idx': 120},
 'model_configs': {'temperature': 0.7, 'top_p': 0.95, 'max_length': 2048},
 'parse_config': {'saving_path': 'parsed_generations'},
 'results_config': {'saving_path': 'results', 'log_only_total': True}}

In [5]:
selected_problem = train.filter(pl.col("id") == 2545)


In [6]:
prompt_input = selected_problem.select("input").to_struct().to_pandas().iloc[0]["input"]
prompt = f"Please write a Python program \nQUESTION: \n{prompt_input} \n ANSWER: \n."

In [7]:
wandb.init(
    project = "dmcr-taco-experiment-difficulty-relevance", 
    dir = "logs",
    id = f"2545", 
    name = f"2545",
    config = config,

)


run_inference(
    prompt = prompt_input,
    instruction = config["inference_configs"]["instruction"],
    saving_path = f"{config['inference_configs']['saving_path']}/no_context.json",
    model_path = config["inference_configs"]["model_path"],
    model_configs = config["model_configs"],
    num_returns = config["inference_configs"]["num_returns"],
    num_generations = config["inference_configs"]["num_generations"],
    log_datetime = config["inference_configs"]["log_datetime"],
    quantization = config["inference_configs"]["quantization"]
    
)

parse_generations(
    generations_path=f"{config['inference_configs']['saving_path']}/no_context.json",
    id = 2545,
    saving_path = f"{config['parse_configs']['saving_path']}/no_context_parsed.json"
)

evaluator = TACOEvaluator(
    generation_file = f"{config['parse_configs']['saving_path']}/no_context_parsed.json",
    taco = train_dict[2545],
    k_pass = [1, 10, 100],
    k_pass_path = f"{config['results_configs']['saving_path']}/no_context_1_pass.json",
    normalized_sum_path = f"{config['results_configs']['saving_path']}/no_context_normalized_sum.json"
)

evaluator.evaluate()
wandb.finish()



# parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")
# compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")
# results = json.load(open("no_context_1_pass.json"))

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33mc214129[0m ([33mc214129-unicamp[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


ImportError: Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [8]:
f"{config["parse_configs"]["saving_path"]}/no_context_parsed.json"

SyntaxError: f-string: unmatched '[' (2816954221.py, line 1)

In [19]:
parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")

In [20]:
compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")

In [21]:
json.load(open("no_context_1_pass.json"))

{'2545': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}