In [None]:
.

In [1]:
!nvidia-smi

Sat Jul 19 09:48:00 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:81:00.0  On |                  N/A |
|  0%   46C    P3            140W /  370W |       2MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!kill 321414                                                                                                                                                                                                                                                                                                                                                                                

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import json
import jsonlines
import re
import time
from datasets import Dataset


torch.backends.cudnn.benchmark = True  # speedup

def setup_model(model_id="meta-llama/Meta-Llama-3-8B"):
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16,
    )

    model.config.use_cache = False  # optional; can toggle True if preferred

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    return pipe, tokenizer

def load_prompts(filepath):
    with jsonlines.open(filepath) as reader:
        return list(reader)

def generate_text(generator, tokenizer, prompts, max_new_tokens=512):
    dataset = Dataset.from_dict({"text": prompts})
    outputs = generator(
        dataset["text"],
        max_new_tokens=max_new_tokens,
        truncation=True,
        num_return_sequences=2,
        temperature=0.2,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )
    # ✅ Flatten if it's a list of lists
    if isinstance(outputs[0], list):
        outputs = [item for sublist in outputs for item in sublist]
    return outputs


def extract_test_outputs(response):
    outputs = []
    if response and len(response) > 0:
        for res in response:
            generated_text = res.get('generated_text', '')
            match = re.search(r'Test Output:\s*(.*?)(?=\n\s*#|$)', generated_text, re.DOTALL)
            if match:
                outputs.append(match.group(1).strip())
    return outputs if outputs else ['Output not found']

def parse_model_output(model_output):
    triples = []
    lines = [line.strip() for line in model_output.strip().split('\n') if line.strip()]
    pattern = re.compile(r'(.+?)\s*\(([^,]+),\s*([^)]+)\)')
    for line in lines:
        for match in pattern.findall(line):
            relation, subject, obj = match
            triples.append({
                "sub": subject.strip(),
                "rel": relation.strip(),
                "obj": obj.strip()
            })
    return triples

def save_triples(processed_data, output_filepath):
    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        for entry in processed_data:
            json.dump({"id": entry["id"], "triples": entry["triples"]}, outfile, ensure_ascii=False)
            outfile.write("\n")

def main(jsonl_path, output_path, generator, tokenizer, num_prompts=548, batch_size=16):
    prompts = load_prompts(jsonl_path)
    results = []

    for i in range(0, min(num_prompts, len(prompts)), batch_size):
        batch = prompts[i:i + batch_size]
        batch_ids = [item['id'] for item in batch]
        batch_prompts = [item['prompt'] for item in batch]

        try:
            start_time = time.time()
            responses = generate_text(generator, tokenizer, batch_prompts)
            elapsed = time.time() - start_time
            print(f"Batch {i}-{i+len(batch)-1} inference time: {elapsed:.2f} seconds")
        except Exception as e:
            print(f"❌ Error in batch {i}-{i+batch_size}: {e}")
            continue

        # responses length = batch_size * num_return_sequences (2)
        # group responses per prompt (2 each)
        for idx in range(len(batch)):
            # Each prompt has 2 responses: response at idx*2 and idx*2 + 1
            prompt_responses = responses[2*idx:2*idx+2]

            test_outputs = extract_test_outputs(prompt_responses)

            all_triples = []
            seen = set()
            for test_output in test_outputs:
                triples = parse_model_output(test_output)
                for triple in triples:
                    triple_key = (triple["sub"], triple["rel"], triple["obj"])
                    if triple_key not in seen:
                        seen.add(triple_key)
                        all_triples.append(triple)

            print(f"[{i + idx + 1}/{num_prompts}] ID: {batch_ids[idx]} → Unique triples extracted: {len(all_triples)}")

            results.append({
                "id": batch_ids[idx],
                "triples": all_triples
            })

    save_triples(results, output_path)
    print(f"\n✅ All {len(results)} prompts processed. Results saved to: {output_path}")
    


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Example Usage
if __name__ == "__main__":
    input_file = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/input_prompts/cot_prompts/ont_10_culture_prompts_improved.jsonl'
    output_file = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/response_run4/Llama3/cot_response_without_quant_batch/ont_10_culture_llm_response_improved.jsonl"
    text_pipe, tokenizer = setup_model("meta-llama/Meta-Llama-3-8B")
    main(input_file, output_file, text_pipe, tokenizer, num_prompts=1500, batch_size=4)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.39it/s]
Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Batch 0-3 inference time: 47.14 seconds
[1/1500] ID: ont_10_culture_test_1 → Unique triples extracted: 1
[2/1500] ID: ont_10_culture_test_2 → Unique triples extracted: 28
[3/1500] ID: ont_10_culture_test_3 → Unique triples extracted: 1
[4/1500] ID: ont_10_culture_test_4 → Unique triples extracted: 1
Batch 4-7 inference time: 67.92 seconds
[5/1500] ID: ont_10_culture_test_5 → Unique triples extracted: 4
[6/1500] ID: ont_10_culture_test_6 → Unique triples extracted: 1
[7/1500] ID: ont_10_culture_test_7 → Unique triples extracted: 1
[8/1500] ID: ont_10_culture_test_8 → Unique triples extracted: 1
Batch 8-11 inference time: 68.78 seconds
[9/1500] ID: ont_10_culture_test_9 → Unique triples extracted: 5
[10/1500] ID: ont_10_culture_test_10 → Unique triples extracted: 1
[11/1500] ID: ont_10_culture_test_11 → Unique triples extracted: 1
[12/1500] ID: ont_10_culture_test_12 → Unique triples extracted: 4
Batch 12-15 inference time: 90.29 seconds
[13/1500] ID: ont_10_culture_test_13 → Unique trip

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Batch 36-39 inference time: 71.80 seconds
[37/1500] ID: ont_10_culture_test_37 → Unique triples extracted: 1
[38/1500] ID: ont_10_culture_test_38 → Unique triples extracted: 6
[39/1500] ID: ont_10_culture_test_39 → Unique triples extracted: 4
[40/1500] ID: ont_10_culture_test_40 → Unique triples extracted: 1
Batch 40-43 inference time: 91.61 seconds
[41/1500] ID: ont_10_culture_test_41 → Unique triples extracted: 10
[42/1500] ID: ont_10_culture_test_42 → Unique triples extracted: 1
[43/1500] ID: ont_10_culture_test_43 → Unique triples extracted: 5
[44/1500] ID: ont_10_culture_test_44 → Unique triples extracted: 1
Batch 44-47 inference time: 89.67 seconds
[45/1500] ID: ont_10_culture_test_45 → Unique triples extracted: 31
[46/1500] ID: ont_10_culture_test_46 → Unique triples extracted: 2
[47/1500] ID: ont_10_culture_test_47 → Unique triples extracted: 2
[48/1500] ID: ont_10_culture_test_48 → Unique triples extracted: 1
Batch 48-51 inference time: 90.01 seconds
[49/1500] ID: ont_10_cultu