In [3]:
!nvidia-smi

Sat Jul 19 01:01:12 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:81:00.0  On |                  N/A |
|  0%   41C    P8             31W /  370W |   14646MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!kill 586370                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import json
import jsonlines
import re
import time

torch.backends.cudnn.benchmark = True  # speedup

def setup_model(model_id="mistralai/Mistral-7B-Instruct-v0.3"):
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16,
    )

    model.config.use_cache = False  # optional; can toggle True if preferred

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    return pipe, tokenizer

def load_prompts(filepath):
    with jsonlines.open(filepath) as reader:
        return list(reader)

def generate_text(generator, tokenizer, prompts, max_new_tokens=512):
    input_lens = [tokenizer(p, return_tensors="pt")['input_ids'].shape[1] for p in prompts]
    max_tokens_batch = [min(2 * l, max_new_tokens) for l in input_lens]
    max_new_tokens = max(max_tokens_batch) if max_tokens_batch else max_new_tokens

    outputs = generator(
        prompts,
        max_new_tokens=max_new_tokens,
        truncation=True,
        num_return_sequences=2,
        temperature=0.2,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

    # Flatten list of lists if pipeline returns nested list
    if isinstance(outputs[0], list):
        outputs = [item for sublist in outputs for item in sublist]
    
    return outputs


def extract_test_outputs(response):
    outputs = []
    if response and len(response) > 0:
        for res in response:
            generated_text = res.get('generated_text', '')
            match = re.search(r'Test Output:\s*(.*?)(?=\n\s*#|$)', generated_text, re.DOTALL)
            if match:
                outputs.append(match.group(1).strip())
    return outputs if outputs else ['Output not found']

def parse_model_output(model_output):
    triples = []
    lines = [line.strip() for line in model_output.strip().split('\n') if line.strip()]
    pattern = re.compile(r'(.+?)\s*\(([^,]+),\s*([^)]+)\)')
    for line in lines:
        for match in pattern.findall(line):
            relation, subject, obj = match
            triples.append({
                "sub": subject.strip(),
                "rel": relation.strip(),
                "obj": obj.strip()
            })
    return triples

def save_triples(processed_data, output_filepath):
    with open(output_filepath, 'w', encoding='utf-8') as outfile:
        for entry in processed_data:
            json.dump({"id": entry["id"], "triples": entry["triples"]}, outfile, ensure_ascii=False)
            outfile.write("\n")

def main(jsonl_path, output_path, generator, tokenizer, num_prompts=548, batch_size=16):
    prompts = load_prompts(jsonl_path)
    results = []

    for i in range(0, min(num_prompts, len(prompts)), batch_size):
        batch = prompts[i:i + batch_size]
        batch_ids = [item['id'] for item in batch]
        batch_prompts = [item['prompt'] for item in batch]

        try:
            start_time = time.time()
            responses = generate_text(generator, tokenizer, batch_prompts)
            elapsed = time.time() - start_time
            print(f"Batch {i}-{i+len(batch)-1} inference time: {elapsed:.2f} seconds")
        except Exception as e:
            print(f"❌ Error in batch {i}-{i+batch_size}: {e}")
            continue

        # responses length = batch_size * num_return_sequences (2)
        # group responses per prompt (2 each)
        for idx in range(len(batch)):
            # Each prompt has 2 responses: response at idx*2 and idx*2 + 1
            prompt_responses = responses[2*idx:2*idx+2]

            test_outputs = extract_test_outputs(prompt_responses)

            all_triples = []
            seen = set()
            for test_output in test_outputs:
                triples = parse_model_output(test_output)
                for triple in triples:
                    triple_key = (triple["sub"], triple["rel"], triple["obj"])
                    if triple_key not in seen:
                        seen.add(triple_key)
                        all_triples.append(triple)

            print(f"[{i + idx + 1}/{num_prompts}] ID: {batch_ids[idx]} → Unique triples extracted: {len(all_triples)}")

            results.append({
                "id": batch_ids[idx],
                "triples": all_triples
            })
            
    save_triples(results, output_path)
    print(f"\n✅ All {len(results)} prompts processed. Results saved to: {output_path}")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Example Usage
if __name__ == "__main__":
    input_file = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/input_prompts/cot_prompts/ont_5_military_prompts_improved.jsonl'
    output_file = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/response_run3/Mistral/cot_response_without_quant_batch/ont_5_military_llm_response_improved.jsonl"
    text_pipe, tokenizer = setup_model("mistralai/Mistral-7B-Instruct-v0.3")
    main(input_file, output_file, text_pipe, tokenizer, num_prompts=1500, batch_size=4)

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.02s/it]
Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Batch 0-3 inference time: 54.95 seconds
[1/1500] ID: ont_5_military_test_1 → Unique triples extracted: 1
[2/1500] ID: ont_5_military_test_2 → Unique triples extracted: 2
[3/1500] ID: ont_5_military_test_3 → Unique triples extracted: 3
[4/1500] ID: ont_5_military_test_4 → Unique triples extracted: 4
Batch 4-7 inference time: 93.58 seconds
[5/1500] ID: ont_5_military_test_5 → Unique triples extracted: 1
[6/1500] ID: ont_5_military_test_6 → Unique triples extracted: 2
[7/1500] ID: ont_5_military_test_7 → Unique triples extracted: 1
[8/1500] ID: ont_5_military_test_8 → Unique triples extracted: 0
Batch 8-11 inference time: 66.84 seconds
[9/1500] ID: ont_5_military_test_9 → Unique triples extracted: 8
[10/1500] ID: ont_5_military_test_10 → Unique triples extracted: 2
[11/1500] ID: ont_5_military_test_11 → Unique triples extracted: 1
[12/1500] ID: ont_5_military_test_12 → Unique triples extracted: 1
Batch 12-15 inference time: 70.86 seconds
[13/1500] ID: ont_5_military_test_13 → Unique tripl

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Batch 36-39 inference time: 74.89 seconds
[37/1500] ID: ont_5_military_test_37 → Unique triples extracted: 4
[38/1500] ID: ont_5_military_test_38 → Unique triples extracted: 6
[39/1500] ID: ont_5_military_test_39 → Unique triples extracted: 2
[40/1500] ID: ont_5_military_test_40 → Unique triples extracted: 1
Batch 40-43 inference time: 57.34 seconds
[41/1500] ID: ont_5_military_test_41 → Unique triples extracted: 6
[42/1500] ID: ont_5_military_test_42 → Unique triples extracted: 1
[43/1500] ID: ont_5_military_test_43 → Unique triples extracted: 1
[44/1500] ID: ont_5_military_test_44 → Unique triples extracted: 9
Batch 44-47 inference time: 52.87 seconds
[45/1500] ID: ont_5_military_test_45 → Unique triples extracted: 1
[46/1500] ID: ont_5_military_test_46 → Unique triples extracted: 1
[47/1500] ID: ont_5_military_test_47 → Unique triples extracted: 3
[48/1500] ID: ont_5_military_test_48 → Unique triples extracted: 1
Batch 48-51 inference time: 93.56 seconds
[49/1500] ID: ont_5_military