In [1]:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import torch
import numpy as np
seed = 42
# NumPy
np.random.seed(seed)

# PyTorch
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


if torch.cuda.is_available():
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Number of GPUs available: 1
GPU 0: NVIDIA A100 80GB PCIe


In [4]:
import polars as pl
import json
from datasets import load_from_disk
import yaml
import wandb

## Repo funtions
from taco_utils.evaluators.compute_1_pass_by_test import compute_1_pass_by_test
from taco_utils import run_inference, parse_generations

In [3]:
PATH  = "../data/TACO/processed"
train = pl.read_ipc(f"{PATH}/train.feather")
train_solutions = pl.read_ipc(f"{PATH}/train_solutions.feather")
train_tests = pl.read_ipc(f"{PATH}/train_evaluation_tests.feather")
train_dict = load_from_disk("../data/TACO/train.hf")

In [5]:
config = yaml.safe_load(open("config.yaml"))
config

{'inference_config': {'instruction': 'You are a coding generation tool that will solve a problem using Python',
  'saving_path': 'generations',
  'model_path': '../models/llm/Llama-3.2-3B-Instruct',
  'num_returns': 20,
  'num_generations': 20,
  'log_datetime': False},
 'model_config': {'temperature': 0.7, 'top_p': 0.95, 'max_length': 2048},
 'parse_config': {'saving_path': 'parsed_generations'},
 'results_config': {'saving_path': 'results', 'log_only_total': True}}

In [7]:
selected_problem = train.filter(pl.col("id") == 2545)


In [8]:
prompt_input = selected_problem.select("input").to_struct().to_pandas().iloc[0]["input"]
prompt = f"Please write a Python program \nQUESTION: \n{prompt_input} \n ANSWER: \n."

In [None]:
wandb.init(
    project = "dmcr-taco-experiment-difficulty-relevance", 
    dir = "logs",
    id = f"3545", 
    name = f"2545",
    config = config,

)
run_inference(
    prompt = prompt_input,
    instruction = config["inference_config"]["instruction"],
    saving_path = f"{}/no_context.json",
    
)





parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")
compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")
results = json.load(open("no_context_1_pass.json"))

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.32s/it]


Lopp 0, 2025-03-17 15:55:00.053932
Lopp 1, 2025-03-17 15:55:12.417611


In [19]:
parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")

In [20]:
compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")

In [21]:
json.load(open("no_context_1_pass.json"))

{'2545': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}