In [4]:
!nvidia-smi

Tue Jun 24 14:21:10 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:81:00.0 Off |                  N/A |
|  0%   41C    P8             31W /  370W |   17654MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!kill 144012                                    

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import json
import jsonlines
import re
import time

torch.backends.cudnn.benchmark = True  # speedup

def setup_model(model_id="meta-llama/Meta-Llama-3-8B"):
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16,
    )

    model.config.use_cache = False  # optional; can toggle True if preferred

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    return pipe, tokenizer

def load_prompts(filepath):
    with jsonlines.open(filepath) as reader:
        return list(reader)

def generate_text(generator, tokenizer, prompts, max_new_tokens=512):
    input_lens = [tokenizer(p, return_tensors="pt")['input_ids'].shape[1] for p in prompts]
    max_tokens_batch = [min(2 * l, max_new_tokens) for l in input_lens]
    max_new_tokens = max(max_tokens_batch) if max_tokens_batch else max_new_tokens

    outputs = generator(
        prompts,
        max_new_tokens=max_new_tokens,
        truncation=True,
        num_return_sequences=2,
        temperature=0.2,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

    # Flatten list of lists if pipeline returns nested list
    if isinstance(outputs[0], list):
        outputs = [item for sublist in outputs for item in sublist]
    
    return outputs


def extract_test_outputs(response):
    outputs = []
    if response and len(response) > 0:
        for res in response:
            generated_text = res.get('generated_text', '')
            match = re.search(r'Test Output:\s*(.*?)(?=\n\s*#|$)', generated_text, re.DOTALL)
            if match:
                outputs.append(match.group(1).strip())
    return outputs if outputs else ['Output not found']

def parse_model_output(model_output):
    triples = []
    lines = [line.strip() for line in model_output.strip().split('\n') if line.strip()]
    pattern = re.compile(r'(.+?)\s*\(([^,]+),\s*([^)]+)\)')
    for line in lines:
        for match in pattern.findall(line):
            relation, subject, obj = match
            triples.append({
                "sub": subject.strip(),
                "rel": relation.strip(),
                "obj": obj.strip()
            })
    return triples

def save_triples(processed_data, output_filepath):
    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        for entry in processed_data:
            json.dump({"id": entry["id"], "triples": entry["triples"]}, outfile, ensure_ascii=False)
            outfile.write("\n")

def main(jsonl_path, output_path, generator, tokenizer, num_prompts=548, batch_size=16):
    prompts = load_prompts(jsonl_path)
    results = []

    for i in range(0, min(num_prompts, len(prompts)), batch_size):
        batch = prompts[i:i + batch_size]
        batch_ids = [item['id'] for item in batch]
        batch_prompts = [item['prompt'] for item in batch]

        try:
            start_time = time.time()
            responses = generate_text(generator, tokenizer, batch_prompts)
            elapsed = time.time() - start_time
            print(f"Batch {i}-{i+len(batch)-1} inference time: {elapsed:.2f} seconds")
        except Exception as e:
            print(f"❌ Error in batch {i}-{i+batch_size}: {e}")
            continue

        # responses length = batch_size * num_return_sequences (2)
        # group responses per prompt (2 each)
        for idx in range(len(batch)):
            # Each prompt has 2 responses: response at idx*2 and idx*2 + 1
            prompt_responses = responses[2*idx:2*idx+2]

            test_outputs = extract_test_outputs(prompt_responses)

            all_triples = []
            seen = set()
            for test_output in test_outputs:
                triples = parse_model_output(test_output)
                for triple in triples:
                    triple_key = (triple["sub"], triple["rel"], triple["obj"])
                    if triple_key not in seen:
                        seen.add(triple_key)
                        all_triples.append(triple)

            print(f"[{i + idx + 1}/{num_prompts}] ID: {batch_ids[idx]} → Unique triples extracted: {len(all_triples)}")

            results.append({
                "id": batch_ids[idx],
                "triples": all_triples
            })

    save_triples(results, output_path)
    print(f"\n✅ All {len(results)} prompts processed. Results saved to: {output_path}")



In [None]:
# Example Usage
if __name__ == "__main__":
    input_file = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/input_prompts/cot_prompts/ont_9_nature_prompts_improved.jsonl'
    output_file = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/response/Llama3/cot_response_without_quant_batch/ont_9_nature_llm_response_improved.jsonl"
    text_pipe, tokenizer = setup_model("meta-llama/Meta-Llama-3-8B")
    main(input_file, output_file, text_pipe, tokenizer, num_prompts=1500, batch_size=4)

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.23it/s]
Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import json
import jsonlines
import re
import time
from datasets import Dataset


torch.backends.cudnn.benchmark = True  # speedup

def setup_model(model_id="meta-llama/Meta-Llama-3-8B"):
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16,
    )

    model.config.use_cache = False  # optional; can toggle True if preferred

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    return pipe, tokenizer

def load_prompts(filepath):
    with jsonlines.open(filepath) as reader:
        return list(reader)

def generate_text(generator, tokenizer, prompts, max_new_tokens=512):
    dataset = Dataset.from_dict({"text": prompts})
    outputs = generator(
        dataset["text"],
        max_new_tokens=max_new_tokens,
        truncation=True,
        num_return_sequences=2,
        temperature=0.2,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )
    # ✅ Flatten if it's a list of lists
    if isinstance(outputs[0], list):
        outputs = [item for sublist in outputs for item in sublist]
    return outputs


def extract_test_outputs(response):
    outputs = []
    if response and len(response) > 0:
        for res in response:
            generated_text = res.get('generated_text', '')
            match = re.search(r'Test Output:\s*(.*?)(?=\n\s*#|$)', generated_text, re.DOTALL)
            if match:
                outputs.append(match.group(1).strip())
    return outputs if outputs else ['Output not found']

def parse_model_output(model_output):
    triples = []
    lines = [line.strip() for line in model_output.strip().split('\n') if line.strip()]
    pattern = re.compile(r'(.+?)\s*\(([^,]+),\s*([^)]+)\)')
    for line in lines:
        for match in pattern.findall(line):
            relation, subject, obj = match
            triples.append({
                "sub": subject.strip(),
                "rel": relation.strip(),
                "obj": obj.strip()
            })
    return triples

def save_triples(processed_data, output_filepath):
    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        for entry in processed_data:
            json.dump({"id": entry["id"], "triples": entry["triples"]}, outfile, ensure_ascii=False)
            outfile.write("\n")

def main(jsonl_path, output_path, generator, tokenizer, num_prompts=548, batch_size=16):
    prompts = load_prompts(jsonl_path)
    results = []

    for i in range(0, min(num_prompts, len(prompts)), batch_size):
        batch = prompts[i:i + batch_size]
        batch_ids = [item['id'] for item in batch]
        batch_prompts = [item['prompt'] for item in batch]

        try:
            start_time = time.time()
            responses = generate_text(generator, tokenizer, batch_prompts)
            elapsed = time.time() - start_time
            print(f"Batch {i}-{i+len(batch)-1} inference time: {elapsed:.2f} seconds")
        except Exception as e:
            print(f"❌ Error in batch {i}-{i+batch_size}: {e}")
            continue

        # responses length = batch_size * num_return_sequences (2)
        # group responses per prompt (2 each)
        for idx in range(len(batch)):
            # Each prompt has 2 responses: response at idx*2 and idx*2 + 1
            prompt_responses = responses[2*idx:2*idx+2]

            test_outputs = extract_test_outputs(prompt_responses)

            all_triples = []
            seen = set()
            for test_output in test_outputs:
                triples = parse_model_output(test_output)
                for triple in triples:
                    triple_key = (triple["sub"], triple["rel"], triple["obj"])
                    if triple_key not in seen:
                        seen.add(triple_key)
                        all_triples.append(triple)

            print(f"[{i + idx + 1}/{num_prompts}] ID: {batch_ids[idx]} → Unique triples extracted: {len(all_triples)}")

            results.append({
                "id": batch_ids[idx],
                "triples": all_triples
            })

    save_triples(results, output_path)
    print(f"\n✅ All {len(results)} prompts processed. Results saved to: {output_path}")


In [2]:
# Example Usage
if __name__ == "__main__":
    input_file = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/input_prompts/cot_prompts/ont_5_military_prompts_improved.jsonl'
    output_file = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/response/Llama3/cot_response_without_quant_batch/ont_5_military_llm_response_improved.jsonl"
    text_pipe, tokenizer = setup_model("meta-llama/Meta-Llama-3-8B")
    main(input_file, output_file, text_pipe, tokenizer, num_prompts=1500, batch_size=8)

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.17it/s]
Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Batch 0-7 inference time: 182.56 seconds
[1/1500] ID: ont_4_book_test_1 → Unique triples extracted: 1
[2/1500] ID: ont_4_book_test_2 → Unique triples extracted: 2
[3/1500] ID: ont_4_book_test_3 → Unique triples extracted: 1
[4/1500] ID: ont_4_book_test_4 → Unique triples extracted: 3
[5/1500] ID: ont_4_book_test_5 → Unique triples extracted: 1
[6/1500] ID: ont_4_book_test_6 → Unique triples extracted: 1
[7/1500] ID: ont_4_book_test_7 → Unique triples extracted: 0
[8/1500] ID: ont_4_book_test_8 → Unique triples extracted: 1
Batch 8-15 inference time: 94.98 seconds
[9/1500] ID: ont_4_book_test_9 → Unique triples extracted: 6
[10/1500] ID: ont_4_book_test_10 → Unique triples extracted: 1
[11/1500] ID: ont_4_book_test_11 → Unique triples extracted: 1
[12/1500] ID: ont_4_book_test_12 → Unique triples extracted: 1
[13/1500] ID: ont_4_book_test_13 → Unique triples extracted: 5
[14/1500] ID: ont_4_book_test_14 → Unique triples extracted: 1
[15/1500] ID: ont_4_book_test_15 → Unique triples extr

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Batch 72-79 inference time: 141.09 seconds
[73/1500] ID: ont_4_book_test_73 → Unique triples extracted: 2
[74/1500] ID: ont_4_book_test_74 → Unique triples extracted: 1
[75/1500] ID: ont_4_book_test_75 → Unique triples extracted: 1
[76/1500] ID: ont_4_book_test_76 → Unique triples extracted: 1
[77/1500] ID: ont_4_book_test_77 → Unique triples extracted: 1
[78/1500] ID: ont_4_book_test_78 → Unique triples extracted: 1
[79/1500] ID: ont_4_book_test_79 → Unique triples extracted: 1
[80/1500] ID: ont_4_book_test_80 → Unique triples extracted: 4
Batch 80-87 inference time: 159.85 seconds
[81/1500] ID: ont_4_book_test_81 → Unique triples extracted: 3
[82/1500] ID: ont_4_book_test_82 → Unique triples extracted: 9
[83/1500] ID: ont_4_book_test_83 → Unique triples extracted: 1
[84/1500] ID: ont_4_book_test_84 → Unique triples extracted: 0
[85/1500] ID: ont_4_book_test_85 → Unique triples extracted: 0
[86/1500] ID: ont_4_book_test_86 → Unique triples extracted: 1
[87/1500] ID: ont_4_book_test_87

In [None]:
# Example Usage
if __name__ == "__main__":
    input_file = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/input_prompts/cot_prompts/ont_19_film_prompts_improved.jsonl'
    output_file = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/response/Llama3/cot_response_without_quant_batch/ont_19_film_llm_response_improved.jsonl"
    text_pipe, tokenizer = setup_model("meta-llama/Meta-Llama-3-8B")
    main(input_file, output_file, text_pipe, tokenizer, num_prompts=1500, batch_size=4)