In [1]:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import uuid
import torch
import numpy as np
seed = 42
import random
# NumPy
np.random.seed(seed)
random.seed(seed)
# PyTorch
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


if torch.cuda.is_available():
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Number of GPUs available: 1
GPU 0: NVIDIA RTX A5000


In [2]:
import polars as pl
pl.set_random_seed(seed)

import json
from datasets import load_from_disk
import yaml
import wandb

## Repo funtions
from taco_utils.evaluators.TACOEvaluator import TACOEvaluator
from taco_utils import run_inference, parse_generations

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
PATH  = "../data/TACO/processed"
train = pl.read_ipc(f"{PATH}/train.feather")
test = pl.read_ipc(f"{PATH}/test.feather")
train_solutions = pl.read_ipc(f"{PATH}/train_solutions.feather")
train_tests = pl.read_ipc(f"{PATH}/train_evaluation_tests.feather")


FileNotFoundError: Directory ../data/TACO/train.hf not found

In [11]:
test_evaluation_tests = pl.read_ipc(f"{PATH}/test_evaluation_tests.feather")

In [12]:
test_evaluation_tests.filter(pl.col("id") == 102)

id,test_id,input,output
i64,i64,str,str
102,0,"""6 2 2 1 1 2 2 1 1 ""","""6 """
102,1,"""1 1 1 1 ""","""0 """
102,2,"""10 2 1 2 1 2 2 1 2 2 1 1 2 ""","""5 """
102,3,"""50 2 1 1 1 2 2 1 2 1 1 2 2 1 2…","""15 """
102,4,"""75 5 5 1 1 5 5 3 5 2 3 3 2 2 1…","""6 """
…,…,…,…
102,282,"""6 31 7 10 -1 7 14 6 3 ""","""0 """
102,283,"""100 100 100 1 2 3 4 5 6 7 8 9 …","""0 """
102,284,"""100 50 22 15 2 25 15 48 43 46 …","""2 """
102,285,"""1 1 1 1 ""","""0"""


In [4]:
_filter = (
        test
        .filter(pl.col("difficulty") == "MEDIUM")
        .group_by("tags")
        .agg(pl.col("id").count().alias("count"))
        .filter(pl.col("count") >= 5)
        .select("tags")

    )

df = (
            test
            .filter(pl.col("difficulty") == "MEDIUM")
            .join(_filter, on="tags", how="inner")
            .group_by("tags")
            .agg(pl.col("id").sample(n=1, shuffle=True, with_replacement=False,seed=42))
            .sort("tags")
            )

df.to_numpy()

array([['Ad-hoc', array([190], dtype=uint32)],
       ['Amortized analysis', array([102], dtype=uint32)],
       ['Bit manipulation', array([165], dtype=uint32)],
       ['Combinatorics', array([273], dtype=uint32)],
       ['Complete search', array([780], dtype=uint32)],
       ['Constructive algorithms', array([143], dtype=uint32)],
       ['Data structures', array([291], dtype=uint32)],
       ['Dynamic programming', array([283], dtype=uint32)],
       ['Fundamentals', array([236], dtype=uint32)],
       ['Game theory', array([111], dtype=uint32)],
       ['Geometry', array([154], dtype=uint32)],
       ['Graph algorithms', array([284], dtype=uint32)],
       ['Graph traversal', array([247], dtype=uint32)],
       ['Greedy algorithms', array([321], dtype=uint32)],
       ['Implementation', array([149], dtype=uint32)],
       ['Mathematics', array([168], dtype=uint32)],
       ['Matrices', array([199], dtype=uint32)],
       ['Number theory', array([222], dtype=uint32)],
       ['Pro

In [6]:
input_data = {
        "input_ids": {},
        "context_ids": {}
    }
for _tag in df.to_numpy():
    input_data["input_ids"][_tag[0]] = _tag[1].tolist()



context = {}
for _key in  input_data["input_ids"].keys():
        input_data["context_ids"][_key] = {}
        for _id in  input_data["input_ids"][_key]:
            _df = (
                train
                .filter(pl.col("difficulty") == "MEDIUM")
                .filter(pl.col("tags") == _key)
                .sample(n=4, shuffle=True, with_replacement=False, seed=42)
            )

            input_data["context_ids"][_key][_id] = _df.select(pl.col("id")).to_numpy().squeeze(1).tolist()



input_data["context_ids"]

{'Ad-hoc': {190: [4527, 8409, 19779, 15180]},
 'Amortized analysis': {102: [7154, 6976, 17413, 12907]},
 'Bit manipulation': {165: [4415, 18815, 13743, 4351]},
 'Combinatorics': {273: [7332, 18824, 14330, 3468]},
 'Complete search': {780: [4056, 6938, 6683, 18526]},
 'Constructive algorithms': {143: [4527, 9348, 9297, 19493]},
 'Data structures': {291: [4390, 7646, 7399, 18540]},
 'Dynamic programming': {283: [3839, 7493, 7071, 18926]},
 'Fundamentals': {236: [21214, 6617, 12329, 11405]},
 'Game theory': {111: [5085, 23895, 7220, 17412]},
 'Geometry': {154: [3281, 24864, 6474, 16045]},
 'Graph algorithms': {284: [4795, 8243, 8088, 711]},
 'Graph traversal': {247: [3348, 7179, 3321, 8018]},
 'Greedy algorithms': {321: [4079, 8628, 8324, 19424]},
 'Implementation': {149: [3954, 7893, 7643, 19566]},
 'Mathematics': {168: [3783, 7143, 18669, 7942]},
 'Matrices': {199: [24682, 4528, 6774, 17882]},
 'Number theory': {222: [3457, 6213, 5888, 17926]},
 'Probability': {103: [7533, 11762, 3131, 

In [4]:
config = yaml.safe_load(open("config.yaml"))
config

{'inference_configs': {'instruction': 'You are a coding generation tool that will solve a problem using Python',
  'saving_path': 'generations',
  'model_path': '../models/llms/Llama-3.2-3B-Instruct',
  'num_returns': 20,
  'num_generations': 200,
  'log_datetime': False,
  'quantization': True,
  'start_idx': 0,
  'end_idx': 120},
 'model_configs': {'temperature': 0.7, 'top_p': 0.95, 'max_length': 2048},
 'parse_configs': {'saving_path': 'parsed'},
 'results_configs': {'saving_path': 'results', 'log_only_total': True}}

In [5]:
selected_problem = train.filter(pl.col("id") == 2545)


In [6]:
prompt_input = selected_problem.select("input").to_struct().to_pandas().iloc[0]["input"]
prompt = f"Please write a Python program \nQUESTION: \n{prompt_input} \n ANSWER: \n."

In [None]:
wandb.init(
    project = "dmcr-taco-experiment-difficulty-relevance", 
    dir = "logs",
    id = f"2545", 
    name = f"2545",
    config = config,

)


# run_inference(
#     prompt = prompt_input,
#     instruction = config["inference_configs"]["instruction"],
#     saving_path = f"{config['inference_configs']['saving_path']}/no_context.json",
#     model_path = config["inference_configs"]["model_path"],
#     model_configs = config["model_configs"],
#     num_returns = config["inference_configs"]["num_returns"],
#     num_generations = config["inference_configs"]["num_generations"],
#     log_datetime = config["inference_configs"]["log_datetime"],
#     quantization = config["inference_configs"]["quantization"]
    
# )

# parse_generations(
#     generations_path=f"{config['inference_configs']['saving_path']}/no_context.json",
#     id = 2545,
#     saving_path = f"{config['parse_configs']['saving_path']}/no_context_parsed.json"
# )

evaluator = TACOEvaluator(
    generation_file = f"{config['parse_configs']['saving_path']}/no_context_parsed.json",
    taco = [train_dict[2545]],
    k_pass = [1, 10, 100],
    k_pass_path = f"{config['results_configs']['saving_path']}/no_context_1_pass.json",
    normalized_sum_path = f"{config['results_configs']['saving_path']}/no_context_normalized_sum.json"
)

input_id = str(uuid.uuid4())
with open(f"logs/{input_id}.txt", "w") as f:
    f.write(prompt_input, "utf-8")




evaluator.evaluate()

wandb.log({
    "pass@1": evaluator.extract_pass_1(),
    "normalized_sum": evaluator.extracted_normalized_sum(),
})
wandb.log({"prompt": wandb.File(f"logs/{input_id}.txt")})
wandb.finish()



# parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")
# compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")
# results = json.load(open("no_context_1_pass.json"))

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mc214129[0m ([33mc214129-unicamp[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.38s/it]


[[False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [-3, -3, -3, -3, -3, -3, -3, -3, -3, -3], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False], [False, False, False,

In [8]:
train_dict[2545]

{'question': "It's the rainy season again, and the city experiences frequent showers throughout the day.\n\nThe weather report says that there is a P probability of rainfalls today. Raj has to step out for a meeting at the office, and would like to know the probability that it rains during the time he is on the way.\n\nInput:\n\nThe first line of input contains the number of test cases, T. Each of the following T lines contain two numbers, P and time. P denotes the probability that it will rain today and time is the time (in minutes), it will take for Raj to reach his office.\n\nOutput:\n\nOutput should have T lines each containing answer to corresponding test case. Please round the answer to 4 decimal places.\n\nConstraints:\n\n1 ≤ T ≤ 100\n0 ≤ P ≤ 0.5\n10 ≤ time ≤ 720\ntime is a perfect divisor of 1440.\n\nSAMPLE INPUT\n2\n0 10\n.5 720\n\nSAMPLE OUTPUT\n0.0000\n0.2929",
 'solutions': '["test_case = int(input())\\n\\nwhile test_case:\\n\\tin_1 = input()\\n\\tin_1 = in_1.split()\\n\\t\

In [None]:
def load_generation(input_file):
    generations = {}
    with open(input_file, 'r') as f:
        results = json.load(f)
        for _, res in enumerate(results):
            task_id = res['task_id']
            output = res['output']
            generations[task_id] = output
    return generations

load_generation(f"{config['parse_configs']['saving_path']}/no_context_parsed.json").keys()

dict_keys([2545])

In [None]:
len()

In [11]:
train_dict[2545]

{'question': "It's the rainy season again, and the city experiences frequent showers throughout the day.\n\nThe weather report says that there is a P probability of rainfalls today. Raj has to step out for a meeting at the office, and would like to know the probability that it rains during the time he is on the way.\n\nInput:\n\nThe first line of input contains the number of test cases, T. Each of the following T lines contain two numbers, P and time. P denotes the probability that it will rain today and time is the time (in minutes), it will take for Raj to reach his office.\n\nOutput:\n\nOutput should have T lines each containing answer to corresponding test case. Please round the answer to 4 decimal places.\n\nConstraints:\n\n1 ≤ T ≤ 100\n0 ≤ P ≤ 0.5\n10 ≤ time ≤ 720\ntime is a perfect divisor of 1440.\n\nSAMPLE INPUT\n2\n0 10\n.5 720\n\nSAMPLE OUTPUT\n0.0000\n0.2929",
 'solutions': '["test_case = int(input())\\n\\nwhile test_case:\\n\\tin_1 = input()\\n\\tin_1 = in_1.split()\\n\\t\

In [8]:
f"{config["parse_configs"]["saving_path"]}/no_context_parsed.json"

SyntaxError: f-string: unmatched '[' (2816954221.py, line 1)

In [19]:
parse_generation(json.load(open("no_context.json")), 2545 , "no_context_parsed.json")

In [20]:
compute_1_pass_by_test("no_context_parsed.json", [train_dict[2545]], file="no_context_1_pass.json")

In [21]:
json.load(open("no_context_1_pass.json"))

{'2545': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}