In [1]:
!pip uninstall -y llama-cpp

[0m

### MACOS ONLY, SEE LLAMA.CPP GITHUB FOR OTHER PLATFORMS

In [2]:
!CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.7.tar.gz (66.7 MB)
[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━[0m [32m49.5/66.7 MB[0m [31m41.7 MB/s[0m eta [36m0:00:01[0m^C
[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m57.7/66.7 MB[0m [31m42.4 MB/s[0m eta [36m0:00:01[0m
[?25h

# download model

In [None]:
from llama_cpp import Llama

llm = Llama.from_pretrained(
    repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
    filename="*Q4_K_M.gguf",
    verbose=False,
    local_dir="models",
)

# 0

In [None]:
from llama_cpp import Llama

llm = Llama(model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml")

def extract_knowledge(message):

    llm.create_chat_completion(
        messages=[
            {
                "role": "system",
                "content": "You are a tacit knowledge extractor that only outputs in JSON.",
            },
            {"role": "user", "content": "OpenAI just released an incredible new paper that talks about, look, not diffusion models! That is interesting, because absolutely everyone is talking about diffusion models. These are AI techniques that typically start out from noise, and over time, Reorganize this noise into an image. You see, this concept works for even video generation, it is a series of still images after all, but things get crazier. Diffusion also works in 3D, where the noise shows up as whatever this is. This can be denoised too into virtual character models. Or it works for computer animation too, where the noise can show up as twitching. Insanity. Diffusion models can also perform voice synthesis."},
        ],
        response_format={
            "type": "json_object",
            "schema": {
                "type": "object",
                "properties": {
                    "triplets": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "subject": {"type": "string"},
                                "predicate": {"type": "string"},
                                "object": {"type": "string"}
                            },
                            "required": ["subject", "predicate", "object"]
                        }
                    }
                },
                "required": ["triplets"],
            },
        },
        temperature=0.5,
    )

def speak_to_llama():
    import json

    # Prompt the user for input
    user_message = input("Enter your message: ")

    # Call the extract_knowledge function with the user's message
    response = extract_knowledge(user_message)

    # Parse and print the JSON response
    try:
        response_json = json.loads(response)
        print("Extracted Knowledge Triplets:")
        for triplet in response_json.get("triplets", []):
            print(f"Subject: {triplet['subject']}, Predicate: {triplet['predicate']}, Object: {triplet['object']}")
    except json.JSONDecodeError:
        print("Failed to parse the response as JSON.")


In [None]:
speak_to_llama()

In [None]:
import json
from llama_cpp import Llama
from sqlalchemy import create_engine, Column, Integer, String, Text, ForeignKey
from sqlalchemy.orm import sessionmaker, declarative_base, relationship
import os

# Database setup
Base = declarative_base()

class Message(Base):
    __tablename__ = 'messages'
    id = Column(Integer, primary_key=True)
    role = Column(String(10))  # 'user' or 'assistant'
    content = Column(Text)
    triplets = relationship("Triplet", back_populates="message")

class Triplet(Base):
    __tablename__ = 'triplets'
    id = Column(Integer, primary_key=True)
    subject = Column(String(255))
    predicate = Column(String(255))
    object = Column(String(255))
    message_id = Column(Integer, ForeignKey('messages.id'))
    message = relationship("Message", back_populates="triplets")

# Initialize database
engine = create_engine('sqlite:///chatbot.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Initialize LLaMA model
llm = Llama(model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml")

def extract_knowledge(message):
    response = llm.create_chat_completion(
        messages=[
            {
                "role": "system",
                "content": "You are a tacit knowledge extractor that only outputs in JSON.",
            },
            {"role": "user", "content": message},
        ],
        response_format={
            "type": "json_object",
            "schema": {
                "type": "object",
                "properties": {
                    "triplets": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "subject": {"type": "string"},
                                "predicate": {"type": "string"},
                                "object": {"type": "string"}
                            },
                            "required": ["subject", "predicate", "object"]
                        }
                    }
                },
                "required": ["triplets"],
            },
        },
        temperature=0.5,
    )

    #save json response
    save_message(role='assistant', content=json.dumps(response))
    return json.loads(response['choices'][0]['message']['content'])

def save_message(role, content, extracted_triplets=None):
    msg = Message(role=role, content=content)
    session.add(msg)
    session.commit()
    if extracted_triplets:
        for triplet in extracted_triplets:
            trip = Triplet(
                subject=triplet.get('subject', ''),
                predicate=triplet.get('predicate', ''),
                object=triplet.get('object', ''),
                message_id=msg.id
            )
            session.add(trip)
        session.commit()

def get_recent_conversation(limit=5):
    messages = session.query(Message).order_by(Message.id.desc()).limit(limit).all()
    # Reverse to maintain chronological order
    messages = messages[::-1]
    conversation = []
    for msg in messages:
        conversation.append({"role": msg.role, "content": msg.content})
    return conversation

def generate_response(conversation_history):
    response = llm.create_chat_completion(
        messages=conversation_history,
        temperature=0.7,
    )
    return response['choices'][0]['message']['content']

def speak_to_llama():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_message = input("You: ")
        if user_message.lower() in ['exit', 'quit']:
            print("Chatbot: Goodbye!")
            break

        # Save user message
        save_message(role='user', content=user_message)

        # Extract knowledge from user message
        user_triplet_response = extract_knowledge(user_message)
        user_triplets = user_triplet_response.get("triplets", [])
        save_message(role='user', content=user_message, extracted_triplets=user_triplets)

        # Retrieve recent conversation for context
        conversation = get_recent_conversation(limit=10)  # Adjust limit as needed

        # Generate assistant response
        assistant_response = generate_response(conversation)
        print(f"Assistant: {assistant_response}")

        # Save assistant response
        save_message(role='assistant', content=assistant_response)

        # Extract knowledge from assistant response
        assistant_triplet_response = extract_knowledge(assistant_response)
        assistant_triplets = assistant_triplet_response.get("triplets", [])
        save_message(role='assistant', content=assistant_response, extracted_triplets=assistant_triplets)

if __name__ == "__main__":
    speak_to_llama()

In [None]:
import json
from llama_cpp import Llama
from sqlalchemy import create_engine, Column, Integer, String, Text, ForeignKey
from sqlalchemy.orm import sessionmaker, declarative_base, relationship
import os
import threading

# Database setup
Base = declarative_base()

class Message(Base):
    __tablename__ = 'messages'
    id = Column(Integer, primary_key=True)
    role = Column(String(10))  # 'user' or 'assistant'
    content = Column(Text)
    triplets = relationship("Triplet", back_populates="message")

class Triplet(Base):
    __tablename__ = 'triplets'
    id = Column(Integer, primary_key=True)
    subject = Column(String(255))
    predicate = Column(String(255))
    object = Column(String(255))
    message_id = Column(Integer, ForeignKey('messages.id'))
    message = relationship("Message", back_populates="triplets")

# Initialize database
engine = create_engine('sqlite:///chatbot.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Singleton pattern for LLaMA model
class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format)
            return cls._instance

def extract_valuable_knowledge(message):
    llama_instance = LlamaSingleton().llm
    response = llama_instance.create_chat_completion(
        messages=[
            {
                "role": "system",
                "content": "You are a knowledge extractor that identifies and outputs only valuable information in JSON format.",
            },
            {"role": "user", "content": message},
        ],
        response_format={
            "type": "json_object",
            "schema": {
                "type": "object",
                "properties": {
                    "valuable_knowledge": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "subject": {"type": "string"},
                                "predicate": {"type": "string"},
                                "object": {"type": "string"}
                            },
                            "required": ["subject", "predicate", "object"]
                        }
                    }
                },
                "required": ["valuable_knowledge"],
            },
        },
        temperature=0.5,
    )
    return json.loads(response['choices'][0]['message']['content'])

def save_knowledge_to_json(knowledge, filename='valuable_knowledge.json'):
    try:
        with open(filename, 'r') as file:
            existing_knowledge = json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        existing_knowledge = []

    existing_knowledge.extend(knowledge)

    with open(filename, 'w') as file:
        json.dump(existing_knowledge, file, indent=4)

def load_knowledge_from_json(filename='valuable_knowledge.json'):
    try:
        with open(filename, 'r') as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        return []

def save_message(role, content, extracted_knowledge=None):
    msg = Message(role=role, content=content)
    session.add(msg)
    session.commit()
    if extracted_knowledge:
        for knowledge in extracted_knowledge:
            trip = Triplet(
                subject=knowledge.get('subject', ''),
                predicate=knowledge.get('predicate', ''),
                object=knowledge.get('object', ''),
                message_id=msg.id
            )
            session.add(trip)
        session.commit()
        save_knowledge_to_json(extracted_knowledge)

def get_recent_conversation(limit=5):
    messages = session.query(Message).order_by(Message.id.desc()).limit(limit).all()
    messages = messages[::-1]
    conversation = []
    for msg in messages:
        conversation.append({"role": msg.role, "content": msg.content})
    return conversation

def generate_response(conversation_history):
    llama_instance = LlamaSingleton().llm
    knowledge_triplets = load_knowledge_from_json()
    knowledge_context = "\n".join(
        f"{triplet['subject']} {triplet['predicate']} {triplet['object']}"
        for triplet in knowledge_triplets
    )
    conversation_history.insert(0, {"role": "system", "content": knowledge_context})
    response = llama_instance.create_chat_completion(
        messages=conversation_history,
        temperature=0.7,
    )
    return response['choices'][0]['message']['content']

def speak_to_llama():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_message = input("You: ")
        if user_message.lower() in ['exit', 'quit']:
            print("Chatbot: Goodbye!")
            break

        save_message(role='user', content=user_message)

        user_knowledge_response = extract_valuable_knowledge(user_message)
        user_knowledge = user_knowledge_response.get("valuable_knowledge", [])
        save_message(role='user', content=user_message, extracted_knowledge=user_knowledge)

        conversation = get_recent_conversation(limit=10)

        assistant_response = generate_response(conversation)
        print(f"Assistant: {assistant_response}")

        save_message(role='assistant', content=assistant_response)

        assistant_knowledge_response = extract_valuable_knowledge(assistant_response)
        assistant_knowledge = assistant_knowledge_response.get("valuable_knowledge", [])
        save_message(role='assistant', content=assistant_response, extracted_knowledge=assistant_knowledge)

if __name__ == "__main__":
    speak_to_llama()

In [None]:
import json
import os
from datetime import datetime
from llama_cpp import Llama
import threading

# Singleton pattern for LLaMA model
class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format)
            return cls._instance

# File paths
MESSAGES_FILE = 'messages.json'
KNOWLEDGE_FILE = 'knowledge.json'

# Initialize JSON files if they don't exist
def initialize_json_files():
    for file in [MESSAGES_FILE, KNOWLEDGE_FILE]:
        if not os.path.exists(file):
            with open(file, 'w') as f:
                json.dump([], f)

# Load JSON data from a file
def load_json_data(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Save JSON data to a file
def save_json_data(file_path, data):
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)

# Extract valuable knowledge from a message
def extract_valuable_knowledge(message):
    llama_instance = LlamaSingleton().llm
    response = llama_instance.create_chat_completion(
        messages=[
            {
                "role": "system",
                "content": "You are a knowledge extractor that identifies and outputs only valuable information in JSON format.",
            },
            {"role": "user", "content": message},
        ],
        response_format={
            "type": "json",
            "schema": {
                "type": "object",
                "properties": {
                    "valuable_knowledge": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "subject": {"type": "string"},
                                "predicate": {"type": "string"},
                                "object": {"type": "string"}
                            },
                            "required": ["subject", "predicate", "object"]
                        }
                    }
                },
                "required": ["valuable_knowledge"],
            },
        },
        temperature=0.5,
    )
    return json.loads(response['choices'][0]['message']['content'])

# Save a message to messages.json
def save_message(role, content):
    messages = load_json_data(MESSAGES_FILE)
    message = {
        "role": role,
        "content": content,
        "timestamp": datetime.utcnow().isoformat()
    }
    messages.append(message)
    save_json_data(MESSAGES_FILE, messages)

# Save extracted knowledge to knowledge.json
def save_knowledge(triplets):
    knowledge = load_json_data(KNOWLEDGE_FILE)
    for triplet in triplets:
        triplet['timestamp'] = datetime.utcnow().isoformat()
        knowledge.append(triplet)
    save_json_data(KNOWLEDGE_FILE, knowledge)

# Get recent conversation history
def get_recent_conversation(limit=5):
    messages = load_json_data(MESSAGES_FILE)
    return messages[-limit:]

# Search knowledge triplets for relevant information
def search_knowledge_triplets(query):
    knowledge_triplets = load_json_data(KNOWLEDGE_FILE)
    relevant_triplets = []
    for triplet in knowledge_triplets:
        if query.lower() in (triplet['subject'].lower(), triplet['predicate'].lower(), triplet['object'].lower()):
            relevant_triplets.append(triplet)
    return relevant_triplets

# Generate a response based on conversation history and knowledge
def generate_response(conversation_history, user_message):
    llama_instance = LlamaSingleton().llm
    knowledge_triplets = search_knowledge_triplets(user_message)
    if knowledge_triplets:
        knowledge_context = "\n".join(
            f"{triplet['subject']} {triplet['predicate']} {triplet['object']} (Timestamp: {triplet['timestamp']})"
            for triplet in knowledge_triplets
        )
        response = f"Based on what I know:\n{knowledge_context}"
    else:
        conversation_history.append({"role": "user", "content": user_message})
        response = llama_instance.create_chat_completion(
            messages=conversation_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
    return response

# Main chat function
def speak_to_llama():
    initialize_json_files()
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_message = input("You: ")
        if user_message.lower() in ['exit', 'quit']:
            print("Chatbot: Goodbye!")
            break

        # Save user message
        save_message(role='user', content=user_message)

        # Retrieve recent conversation for context
        conversation = get_recent_conversation(limit=10)

        # Generate assistant response
        assistant_response = generate_response(conversation, user_message)
        print(f"Assistant: {assistant_response}")

        # Save assistant response
        save_message(role='assistant', content=assistant_response)

        # Extract and save knowledge from user message
        user_knowledge_response = extract_valuable_knowledge(user_message)
        user_knowledge = user_knowledge_response.get("valuable_knowledge", [])
        save_knowledge(user_knowledge)

if __name__ == "__main__":
    speak_to_llama()

In [None]:
import json
import os
from datetime import datetime
from llama_cpp import Llama
import threading

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=1024)
            return cls._instance

class Chatbot:
    def __init__(self, messages_file='messages.json', knowledge_file='knowledge.json'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.llm = LlamaSingleton().llm
        self.initialize_json_files()

    def initialize_json_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": "You are a knowledge extractor that identifies and outputs only valuable information in JSON format.",
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        return json.loads(response['choices'][0]['message']['content'])

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {
            "role": role,
            "content": content,
            "timestamp": datetime.utcnow().isoformat()
        }
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        knowledge = self.load_json_data(self.knowledge_file)
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            knowledge.append(triplet)
        self.save_json_data(self.knowledge_file, knowledge)

    def get_recent_conversation(self, limit=5):
        messages = self.load_json_data(self.messages_file)
        return messages[-limit:]

    def search_knowledge_triplets(self, query):
        knowledge_triplets = self.load_json_data(self.knowledge_file)
        relevant_triplets = []
        for triplet in knowledge_triplets:
            if query.lower() in (triplet['subject'].lower(), triplet['predicate'].lower(), triplet['object'].lower()):
                relevant_triplets.append(triplet)
        return relevant_triplets

    def generate_response(self, conversation_history, user_message):
        knowledge_triplets = self.search_knowledge_triplets(user_message)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_triplets:
            knowledge_context = "\n".join(
                f"{triplet['subject']} {triplet['predicate']} {triplet['object']} (Timestamp: {triplet['timestamp']})"
                for triplet in knowledge_triplets
            )
            system_message += f"Based on existing knowledge:\n{knowledge_context}\n"
        conversation_history.insert(0, {"role": "system", "content": system_message})
        conversation_history.append({"role": "user", "content": user_message})
        response = self.llm.create_chat_completion(
            messages=conversation_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break

            self.save_message(role='user', content=user_message)

            conversation = self.get_recent_conversation(limit=10)

            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")

            self.save_message(role='assistant', content=assistant_response)

            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            user_knowledge = user_knowledge_response.get("valuable_knowledge", [])
            self.save_knowledge(user_knowledge)

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

In [None]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=1024)
            return cls._instance

class Chatbot:
    def __init__(self, messages_file='messages.json', knowledge_file='knowledge.json', faiss_index_file='faiss_index.pkl'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": "You are a knowledge extractor that identifies and outputs only valuable information in JSON format.",
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        return json.loads(response['choices'][0]['message']['content'])

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        knowledge = self.load_json_data(self.knowledge_file)
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            knowledge.append(triplet)
            new_triplets.append(triplet)
        self.save_json_data(self.knowledge_file, knowledge)
        self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = [self.knowledge_data[idx] for idx in indices[0] if idx != -1]
        return results

    def generate_response(self, conversation_history, user_message):
        knowledge_matches = self.search_knowledge(user_message)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            knowledge_context = "\n".join(
                f"{t['subject']} {t['predicate']} {t['object']} (Added on: {t['timestamp']})"
                for t in knowledge_matches
            )
            system_message += f"Based on my knowledge:\n{knowledge_context}\n"
        conversation_history.insert(0, {"role": "system", "content": system_message})
        conversation_history.append({"role": "user", "content": user_message})
        response = self.llm.create_chat_completion(
            messages=conversation_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break

            self.save_message(role='user', content=user_message)

            conversation = self.load_json_data(self.messages_file)[-10:]

            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")

            self.save_message(role='assistant', content=assistant_response)

            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            if user_knowledge_response:
                self.save_knowledge(user_knowledge_response)
            else:
                print("No valuable knowledge extracted; continuing conversation.")

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

In [None]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    """A thread-safe singleton for loading a single Llama model instance."""
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=1024)
            return cls._instance


class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        """
        Initialize the chatbot, ensuring that the files exist and loading FAISS index if available.
        """
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file

        # Initialize LLM and embedding model
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)

        # Internal memory of knowledge and FAISS index
        self.index = None
        self.knowledge_data = []

        # Ensure JSON files exist
        self.initialize_files()

        # Load any existing FAISS index
        self.load_faiss_index()

    def initialize_files(self):
        """Ensure that the message and knowledge JSON files exist."""
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        """Load data from a JSON file."""
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        """Save data to a JSON file."""
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        """
        Extract valuable knowledge from a user message using the LLM.
        The LLM is prompted to return JSON structured knowledge.
        """
        # Prompt the model to return valuable knowledge triplets in JSON format.
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Extract valuable knowledge from the user's message.\n"
                        "Return ONLY JSON with the following schema:\n\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )

        # Try parsing the returned JSON.
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            # Ensure the result is at least an empty list if no knowledge was found.
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            # If there's any parsing error, return empty knowledge.
            return []

    def save_message(self, role, content):
        """Append a new message to the messages file."""
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        """
        Save extracted knowledge triplets to the knowledge file and update the FAISS index.
        De-duplicates knowledge based on (subject, predicate, object).
        """
        if not triplets:
            return

        knowledge = self.load_json_data(self.knowledge_file)

        # Create a set of existing triples to avoid duplicates
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}

        new_triplets = []
        for triplet in triplets:
            # Ensure timestamp is properly updated
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)

        # Update knowledge file
        self.save_json_data(self.knowledge_file, knowledge)

        # Update FAISS index if we have new triplets
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        """
        Update the FAISS index with new triplets.
        Each triplet is embedded and added to the index.
        """
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)

        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])

        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        """Save the FAISS index and knowledge data to a file."""
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        """Load the FAISS index and knowledge data if available."""
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        """
        Search the FAISS index for the top_k most relevant knowledge triplets related to the query.

        :param query: The query string to search for.
        :param top_k: The number of top results to return.
        :return: A list of triplets (each a dict with 'subject', 'predicate', 'object', 'timestamp').
        """
        # If no index or knowledge is loaded, return an empty list
        if self.index is None or len(self.knowledge_data) == 0:
            return []

        # Encode the query to its embedding
        query_embedding = self.model.encode([query])

        # Perform the search on the FAISS index
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)

        results = []
        for idx in indices[0]:
            # If FAISS didn't return a valid index (-1), skip it
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, conversation_history, user_message):
        """
        Generate a response using the LLM, enriched with knowledge context if available.
        """
        knowledge_matches = self.search_knowledge(user_message)
        current_time = datetime.utcnow().isoformat()
        
        # Build a system message with current time and matched knowledge
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Added on: {t['timestamp']})\n"
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"

        # Prepend system message and append user message
        enriched_history = [{"role": "system", "content": system_message}] + conversation_history
        enriched_history.append({"role": "user", "content": user_message})

        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        """
        Start the interactive chat loop.
        """
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break

            # Save user message
            self.save_message(role='user', content=user_message)

            # Load recent conversation (last 10 messages to maintain some context)
            conversation = self.load_json_data(self.messages_file)[-10:]
            # Generate assistant response
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")

            # Save assistant response
            self.save_message(role='assistant', content=assistant_response)

            # Extract and save new knowledge from the user's last message
            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            if user_knowledge_response:
                self.save_knowledge(user_knowledge_response)
            else:
                # If no knowledge was extracted, we continue silently
                pass


if __name__ == "__main__":
    chatbot = Chatbot()
    query = "What do we know about topic X?"
    search_results = chatbot.search_knowledge(query, top_k=3)
    if search_results:
        print("Top 3 relevant knowledge triplets:")
        for r in search_results:
            print(f"- {r['subject']} {r['predicate']} {r['object']} (timestamp: {r['timestamp']})")
    else:
        print("No relevant knowledge found for that query.")

In [None]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/qwen2-0_5b-instruct-q8_0.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=1024)
            return cls._instance


class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Extract valuable knowledge from the user's message.\n"
                        "Return ONLY JSON with the following schema:\n\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, conversation_history, user_message):
        knowledge_matches = self.search_knowledge(user_message)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Added on: {t['timestamp']})\n"
                print(system_message)
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"
        enriched_history = [{"role": "system", "content": system_message}] + conversation_history
        enriched_history.append({"role": "user", "content": user_message})
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            self.save_message(role='user', content=user_message)
            conversation = self.load_json_data(self.messages_file)[-10:]
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")
            self.save_message(role='assistant', content=assistant_response)
            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            if user_knowledge_response:
                self.save_knowledge(user_knowledge_response)


if __name__ == "__main__":
    chatbot = Chatbot()
    query = "What do we know about topic X?"
    search_results = chatbot.search_knowledge(query, top_k=3)
    if search_results:
        print("Top 3 relevant knowledge triplets:")
        for r in search_results:
            print(f"- {r['subject']} {r['predicate']} {r['object']} (timestamp: {r['timestamp']})")
    else:
        print("No relevant knowledge found for that query.")

# 1

In [6]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to Extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print(knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, conversation_history, user_message):
        knowledge_matches = self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Answer based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Added on: {t['timestamp']})\n"
            
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}] + conversation_history
        enriched_history.append({"role": "user", "content": user_message})
        print(enriched_history)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            self.save_message(role='user', content=user_message)
            conversation = self.load_json_data(self.messages_file)[-3:]
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")
            generate_speech(assistant_response)
            self.save_message(role='assistant', content=assistant_response)
            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            print(user_knowledge_response)  
            if user_knowledge_response:
                self.save_knowledge(user_knowledge_response)

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

llama_load_model_from_file: using device Metal (Apple M3 Max) - 26569 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:         

Chatbot is ready! Type 'exit' to end the conversation.


  message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
  current_time = datetime.utcnow().isoformat()


[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-01-19T23:36:00.991502\nAnswer based on retrieved knowledge:\n- the user wants to kidnap a girl (Added on: 2024-12-19T00:30:13.857003)\n- Elden Ring is considered the most secret game of the world (Added on: 2024-12-19T00:30:13.857001)\n- Eiffel Tower color brownish-red (Added on: 2024-12-19T00:49:16.904830)\n- Elden Ring release date yetserday (Added on: 2024-12-19T00:47:57.265036)\n- the user views Elden Ring as (Added on: 2024-12-19T00:30:13.856999)\n'}, {'role': 'user', 'content': 'hey who am i?', 'timestamp': '2025-01-19T23:30:55.192986'}, {'role': 'user', 'content': 'hallo welk spel speel ik graag?', 'timestamp': '2025-01-19T23:33:13.741464'}, {'role': 'user', 'content': 'hey i want you to remember that my favourite number is 21, and my dog is called sloffie', 'timestamp': '2025-01-19T23:36:00.903916'}, {'role': 'user', 'content': 'hey i want you to remember that my favourite number is 21, and

llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   344 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   179 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10252.76 ms /   523 tokens


Assistant: It seems like you're providing some personal information. Based on what you've shared, your favorite number is 21 and your dog's name is Sloffie. However, you haven't mentioned any specific game that you like playing. Elden Ring, which you mentioned viewing, is a popular game, but it's not clear if that's the one you like.

To answer your second question, it appears that you are interested in the game Elden Ring. While the exact release date of the game was yesterday, as you mentioned, it is not explicitly known which game you are referring to.

In response to your first question, I am an AI assistant and cannot directly know who you are or what your preferences are, but based on the information you've provided, it seems like you like the number 21 and have a dog named Sloffie. I hope that helps!


Llama.generate: 6 prefix-match hit, remaining 118 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   118 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   122 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6501.10 ms /   240 tokens
  triplet['timestamp'] = datetime.utcnow().isoformat()


{'valuable_knowledge': [{'subject': 'user', 'predicate': 'favourite number', 'object': 21, 'timestamp': '2023-04-10T10:00:00Z'}, {'subject': 'user', 'predicate': "dog's name", 'object': 'sloffie', 'timestamp': '2023-04-10T10:00:00Z'}]}
[{'subject': 'user', 'predicate': 'favourite number', 'object': 21, 'timestamp': '2023-04-10T10:00:00Z'}, {'subject': 'user', 'predicate': "dog's name", 'object': 'sloffie', 'timestamp': '2023-04-10T10:00:00Z'}]
[{'role': 'system', 'content': "You are a helpful assistent; Current date and time: 2025-01-19T23:37:55.135027\nAnswer based on retrieved knowledge:\n- user dog's name sloffie (Added on: 2025-01-19T23:37:18.130914)\n- user favourite number 21 (Added on: 2025-01-19T23:37:18.130908)\n- Eiffel Tower color brownish-red (Added on: 2024-12-19T00:49:16.904830)\n- Eiffel Tower is working with Antonio van Dijck (Added on: 2024-12-19T00:51:56.878330)\n- the user wants to kidnap a girl (Added on: 2024-12-19T00:30:13.857003)\n"}, {'role': 'user', 'content': 

Llama.generate: 6 prefix-match hit, remaining 488 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   488 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3805.06 ms /   497 tokens


Assistant: Your dog's name is Sloffie.


Llama.generate: 6 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    66 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3999.48 ms /   167 tokens


{'valuable_knowledge': [{'subject': 'user', 'predicate': 'asked', 'object': "dog's name", 'timestamp': '2023-04-14T10:00:00Z'}]}
[{'subject': 'user', 'predicate': 'asked', 'object': "dog's name", 'timestamp': '2023-04-14T10:00:00Z'}]
[{'role': 'system', 'content': "You are a helpful assistent; Current date and time: 2025-01-19T23:38:36.141346\nAnswer based on retrieved knowledge:\n- user favourite number 21 (Added on: 2025-01-19T23:37:18.130908)\n- Elden Ring is considered the most secret game of the world (Added on: 2024-12-19T00:30:13.857001)\n- user asked dog's name (Added on: 2025-01-19T23:38:06.311002)\n- wallet location floor (Added on: 2024-12-19T01:22:50.372690)\n- the user views Elden Ring as (Added on: 2024-12-19T00:30:13.856999)\n"}, {'role': 'user', 'content': 'what is my dogs name?', 'timestamp': '2025-01-19T23:37:55.034375'}, {'role': 'assistant', 'content': "Your dog's name is Sloffie.", 'timestamp': '2025-01-19T23:38:02.303754'}, {'role': 'user', 'content': 'Wat is my f

Llama.generate: 6 prefix-match hit, remaining 293 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   293 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2681.32 ms /   301 tokens


Assistant: Your favorite number is 21.


Llama.generate: 6 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    10 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1807.25 ms /   111 tokens


{'valuable_knowledge': []}
[]
[{'role': 'system', 'content': "You are a helpful assistent; Current date and time: 2025-01-19T23:39:20.904877\nAnswer based on retrieved knowledge:\n- user favourite number 21 (Added on: 2025-01-19T23:37:18.130908)\n- Elden Ring release date yetserday (Added on: 2024-12-19T00:47:57.265036)\n- the user is playing Elden Ring (Added on: 2024-12-19T00:30:13.856992)\n- Elden Ring is considered the most secret game of the world (Added on: 2024-12-19T00:30:13.857001)\n- user dog's name sloffie (Added on: 2025-01-19T23:37:18.130914)\n"}, {'role': 'user', 'content': 'Wat is my favorite number?', 'timestamp': '2025-01-19T23:38:36.093352'}, {'role': 'assistant', 'content': 'Your favorite number is 21.', 'timestamp': '2025-01-19T23:38:42.706938'}, {'role': 'user', 'content': 'remember that i was dancing yesterday', 'timestamp': '2025-01-19T23:39:20.869122'}, {'role': 'user', 'content': 'remember that i was dancing yesterday'}]


Llama.generate: 6 prefix-match hit, remaining 299 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    61 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5039.18 ms /   360 tokens


Assistant: You mentioned that you were dancing yesterday. However, there's no specific information about dancing in the context of your favorite number, dog's name, or playing Elden Ring. Your favorite number is 21, but without more context, I can't determine if dancing was related to that or not.


Llama.generate: 6 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    65 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4077.73 ms /   166 tokens


{'valuable_knowledge': [{'subject': 'you', 'predicate': 'danced', 'object': 'yesterday', 'timestamp': '2023-04-04T00:00:00Z'}]}
[{'subject': 'you', 'predicate': 'danced', 'object': 'yesterday', 'timestamp': '2023-04-04T00:00:00Z'}]
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-01-19T23:41:57.363971\nAnswer based on retrieved knowledge:\n- Eiffel Tower is working with Antonio van Dijck (Added on: 2024-12-19T00:51:56.878330)\n- user favourite number 21 (Added on: 2025-01-19T23:37:18.130908)\n- you danced yesterday (Added on: 2025-01-19T23:39:50.804129)\n- freezer action clean (Added on: 2024-12-19T01:22:50.372698)\n- freezer indication stinking (Added on: 2024-12-19T01:22:50.372696)\n'}, {'role': 'user', 'content': 'remember that i was dancing yesterday', 'timestamp': '2025-01-19T23:39:20.869122'}, {'role': 'assistant', 'content': "You mentioned that you were dancing yesterday. However, there's no specific information about dancing in the contex

Llama.generate: 6 prefix-match hit, remaining 386 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   386 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    24 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4063.68 ms /   410 tokens


Assistant: Misschien kan ik je helpen met drie woorden over de film Titanic:

Romantische Tragedie


Llama.generate: 6 prefix-match hit, remaining 124 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   124 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    64 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4129.53 ms /   188 tokens


{'valuable_knowledge': [{'subject': 'Titanic', 'predicate': 'is', 'object': 'film', 'timestamp': '2023-03-15T14:30:00Z'}]}
[{'subject': 'Titanic', 'predicate': 'is', 'object': 'film', 'timestamp': '2023-03-15T14:30:00Z'}]
[{'role': 'system', 'content': "You are a helpful assistent; Current date and time: 2025-01-19T23:42:32.870950\nAnswer based on retrieved knowledge:\n- user dog's name sloffie (Added on: 2025-01-19T23:37:18.130914)\n- you danced yesterday (Added on: 2025-01-19T23:39:50.804129)\n- user favourite number 21 (Added on: 2025-01-19T23:37:18.130908)\n- user asked dog's name (Added on: 2025-01-19T23:38:06.311002)\n- the user wants to kidnap a girl (Added on: 2024-12-19T00:30:13.857003)\n"}, {'role': 'user', 'content': ' kan je me vertellen over de film die nu aan het kijken ben hij heet Titanic en je mag alleen maar drie woorden gebruiken', 'timestamp': '2025-01-19T23:41:57.266117'}, {'role': 'assistant', 'content': 'Misschien kan ik je helpen met drie woorden over de film Ti

Llama.generate: 6 prefix-match hit, remaining 311 prompt tokens to eval
llama_perf_context_print:        load time =    2762.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3319.07 ms /   331 tokens


Assistant: Hier zijn drie woorden over de film Titanic:

Romantisch Tragedie Oceaan


Llama.generate: 6 prefix-match hit, remaining 93 prompt tokens to eval
ggml_metal_free: deallocating


KeyboardInterrupt: 

In [4]:
#!/usr/bin/env python3

"""Simple example to generate an audio file with randomized
dynamic voice selection based on attributes such as Gender,
Language, or Locale."""

import asyncio
import random
import nest_asyncio

import edge_tts
from edge_tts import VoicesManager
import playsound

OUTPUT_FILE = "spanish.mp3"

nest_asyncio.apply()

async def amain(text: str) -> None:
    """Main function"""
    voices = await VoicesManager.create()
    voice = voices.find(Gender="Female", Language="nl")
    # Also supports Locales
    # voice = voices.find(Gender="Female", Locale="es-AR")

    communicate = edge_tts.Communicate(text, random.choice(voice)["Name"])
    await communicate.save(OUTPUT_FILE)
    playsound.playsound(OUTPUT_FILE)

def generate_speech(text: str):
    asyncio.run(amain(text))

# Example usage
generate_speech("hallo, hoe gaat het?") 

In [None]:
!pip install playsound

# 4

In [1]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to Extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print(knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, conversation_history, user_message):
        knowledge_matches = self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Answer based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Added on: {t['timestamp']})\n"
            
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}] + conversation_history
        enriched_history.append({"role": "user", "content": user_message})
        print(enriched_history)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            self.save_message(role='user', content=user_message)
            conversation = self.load_json_data(self.messages_file)[-3:]
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")
            #generate_speech(assistant_response)
            self.save_message(role='assistant', content=assistant_response)
            user_knowledge_response = self.extract_valuable_knowledge(user_message)
            print(user_knowledge_response)  
            if user_knowledge_response:
                self.save_knowledge(user_knowledge_response)

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

llama_model_load_from_file: using device Metal (Apple M3 Max) - 27647 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:         

Chatbot is ready! Type 'exit' to end the conversation.


  message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
  current_time = datetime.utcnow().isoformat()


[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-01-23T13:34:02.665314\nAnswer based on retrieved knowledge:\n- Passengers can disrupt travel by bus driver going past intended stop, taxi drivers taking long routes (Added on: 2025-01-21T03:20:00.470830)\n- coal cars can snarl traffic if placed in a gallery where cars have to pass each other (Added on: 2025-01-21T03:16:31.400472)\n- Boat captains can delay road traffic by leaving draw bridges open (Added on: 2025-01-21T03:22:17.866963)\n- stop-plugs can be removed from lubricating systems to destroy oil outright (Added on: 2025-01-21T03:12:57.104248)\n- tire wear can be increased by driving the car slowly and diagonally into a curb (Added on: 2025-01-21T03:21:32.975862)\n'}, {'role': 'user', 'content': '', 'timestamp': '2025-01-21T13:53:55.969827'}, {'role': 'assistant', 'content': "The information provided suggests that dust can affect the performance and efficiency of a refrigerator. Regular clea

llama_perf_context_print:        load time =    7912.95 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   547 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   175 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   15468.66 ms /   722 tokens
Llama.generate: 6 prefix-match hit, remaining 105 prompt tokens to eval


Assistant: Here are some ways to stop traffic on a main road:

1. Bus Driver Going Past Intended Stop: A bus driver can disrupt traffic by intentionally going past the intended stop. This will cause passengers to get off the bus at the wrong place, creating congestion.

2. Taxi Driver Taking Long Routes: Taxi drivers can also disrupt traffic by taking long routes to their destinations, which can cause traffic jams and delays.

3. Boat Captains Leaving Draw Bridges Open: Boat captains can delay road traffic by leaving draw bridges open. When a boat is passing through, the draw bridge needs to be raised, and this can cause a delay for road traffic.

4. Tire Wear Increasing: Driving a car slowly and diagonally into a curb can increase tire wear, which can cause drivers to replace their tires more frequently and lead to increased traffic congestion due to frequent tire changes and replacements.


llama_perf_context_print:        load time =    7912.95 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   105 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    70 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4284.35 ms /   175 tokens
  triplet['timestamp'] = datetime.utcnow().isoformat()


{'valuable_knowledge': [{'subject': 'user', 'predicate': 'wants', 'object': 'to stop traffic on the main road', 'timestamp': '2023-04-05T10:23:00Z'}]}
[{'subject': 'user', 'predicate': 'wants', 'object': 'to stop traffic on the main road', 'timestamp': '2023-04-05T10:23:00Z'}]
Chatbot: Goodbye!


In [10]:
!pip install "chonkie[semantic]"


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [13]:
# Import the SemanticChunker from chonkie
from chonkie import SentenceChunker

chunker = SentenceChunker(
    chunk_size=512,
    chunk_overlap=128,
    min_sentences_per_chunk=40 #20 voor kort/ 40 voor medium/ 80 voor lang
)

# Read the contents of your input file
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Chunk the text using the chunker
chunks = chunker.chunk(text)

# Iterate through the resulting chunks and print details
for i, chunk in enumerate(chunks, start=1):
    print(f"Chunk {i}:")
    print(f"Text: {chunk.text}")
    print(f"Token Count: {chunk.token_count}")
    print(f"Number of Sentences: {len(chunk.sentences)}")
    print("-" * 40)

Chunk 1:
Text: Hallo iedereen, welkom op mijn kanaal! Vandaag gaan we het hebben over iets dat we allemaal wel eens tegenkomen: problemen met de vriezer. Of het nu gaat om een vriezer die vreemde geluiden maakt, niet goed koelt, of gewoon niet meer werkt zoals het zou moeten, we lopen allemaal wel eens tegen deze issues aan. Een vriezer lijkt misschien een simpel apparaat, maar als je je handen vuil maakt, zul je al snel merken dat er een heleboel nuances bij komen kijken die je niet zomaar uit een handleiding haalt. We duiken vandaag dus echt in de details, zodat je begrijpt wat er allemaal gebeurt onder de motorkap van je vriezer. Laten we beginnen. Het eerste waar ik altijd mee begin is veiligheid. Het lijkt zo logisch, maar het is echt essentieel. Voordat je ook maar iets aanraakt, moet je de stekker eruit trekken. Het gaat niet alleen om de stroom uitschakelen, je moet er echt zeker van zijn dat er geen lading meer in de vriezer zit. Sommige onderdelen, zoals de compressor en capa

In [17]:
!pip install "chonkie[semantic]"


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




# 5

In [3]:
!CMAKE_ARGS="-DGGML_METAL=on" FORCE_CMAKE=1 python3 -m pip install "git+https://github.com/abetlen/llama-cpp-python.git@refs/pull/1901/head" --force-reinstall --upgrade --no-cache-dir

Collecting git+https://github.com/abetlen/llama-cpp-python.git@refs/pull/1901/head
  Cloning https://github.com/abetlen/llama-cpp-python.git (to revision refs/pull/1901/head) to /private/var/folders/76/kx23hgz16fdcd_cjrpy0jnlm0000gn/T/pip-req-build-rsap2wd5
  Running command git clone --filter=blob:none --quiet https://github.com/abetlen/llama-cpp-python.git /private/var/folders/76/kx23hgz16fdcd_cjrpy0jnlm0000gn/T/pip-req-build-rsap2wd5
[0m  Running command git fetch -q https://github.com/abetlen/llama-cpp-python.git refs/pull/1901/head
  Running command git checkout -q 114b76b941e9ce5c64c4ee38c057dbb9295f0213
  Resolved https://github.com/abetlen/llama-cpp-python.git to commit 114b76b941e9ce5c64c4ee38c057dbb9295f0213
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting typing-extensions>=4.5.0 (fr

In [1]:
import json
import os
import pickle
import threading
from datetime import datetime

from chonkie import SemanticChunker
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError
from chonkie import SentenceChunker


# -------------------------------
# The Chatbot and LlamaSingleton
# -------------------------------

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to Extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print("Extracted knowledge from a chunk:", knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            # Use the current timestamp for each new triplet
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

# -------------------------------
# Main Processing: Chunk and Extract Knowledge
# -------------------------------

def main():
    # ----------------------------
    # Part 1: Chunk the input file
    # ----------------------------
    # Initialize the SemanticChunker with desired parameters
    chunker = SentenceChunker(
        chunk_size=512,
        chunk_overlap=128,
        min_sentences_per_chunk=40 #20 voor kort/ 40 voor medium/ 80 voor lang
    )

    # Read the contents of input.txt
    with open('input.txt', 'r', encoding='utf-8') as file:
        text = file.read()

    # Get the list of chunks from the input text
    chunks = chunker.chunk(text)
    print(f"Total chunks created: {len(chunks)}")

    # ----------------------------
    # Part 2: Extract Knowledge from Each Chunk
    # ----------------------------
    # Instantiate the Chatbot (for its knowledge extraction capability)
    chatbot = Chatbot()

    # Process each chunk
    for i, chunk in enumerate(chunks, start=1):
        print(f"\nProcessing chunk {i} (Token count: {chunk.token_count}, Sentences: {len(chunk.sentences)})")
        # Extract valuable knowledge from the chunk text
        extracted_knowledge = chatbot.extract_valuable_knowledge(chunk.text)
        # Save extracted knowledge (if any) to knowledge.json and update FAISS index
        if extracted_knowledge:
            chatbot.save_knowledge(extracted_knowledge)

    print("\nKnowledge extraction complete.")
    print("Please check 'knowledge.json' for the extracted valuable knowledge.")

if __name__ == "__main__":
    main()

  from tqdm.autonotebook import tqdm, trange
llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 27647 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str       

Total chunks created: 4


load: control token: 151659 '<|fim_prefix|>' is not marked as EOG
load: control token: 151656 '<|video_pad|>' is not marked as EOG
load: control token: 151655 '<|image_pad|>' is not marked as EOG
load: control token: 151653 '<|vision_end|>' is not marked as EOG
load: control token: 151652 '<|vision_start|>' is not marked as EOG
load: control token: 151651 '<|quad_end|>' is not marked as EOG
load: control token: 151649 '<|box_end|>' is not marked as EOG
load: control token: 151648 '<|box_start|>' is not marked as EOG
load: control token: 151646 '<|object_ref_start|>' is not marked as EOG
load: control token: 151644 '<|im_start|>' is not marked as EOG
load: control token: 151661 '<|fim_suffix|>' is not marked as EOG
load: control token: 151647 '<|object_ref_end|>' is not marked as EOG
load: control token: 151660 '<|fim_middle|>' is not marked as EOG
load: control token: 151654 '<|vision_pad|>' is not marked as EOG
load: control token: 151650 '<|quad_start|>' is not marked as EOG
load: sp


Processing chunk 1 (Token count: 886, Sentences: 40)


llama_perf_context_print:        load time =    9224.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   945 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1102 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   57398.60 ms /  2047 tokens
Llama.generate: 96 prefix-match hit, remaining 891 prompt tokens to eval



Processing chunk 2 (Token count: 936, Sentences: 40)


KeyboardInterrupt: 

## TIMECODES ZIJN NU START EIND TIJD VAN CHUNKS EN CHUNKS GELADEN UIOT JSON

In [4]:
import json
import os
import pickle
import threading
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

from datetime import datetime

# -------------------------------
# The Chatbot and LlamaSingleton
# -------------------------------

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # This field will be replaced with chunk start and end\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print("Extracted knowledge from a chunk:", knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            # Here we assume the triplet already includes "start" and "end" keys.
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

# -------------------------------
# Main Processing: Load Chunks and Extract Knowledge
# -------------------------------

def main():
    # ----------------------------
    # Part 1: Load the chunks from output_chunks.json
    # ----------------------------
    with open('output_chunks.json', 'r', encoding='utf-8') as file:
        data = json.load(file)
    chunks = data.get("chunks", [])
    print(f"Total chunks loaded: {len(chunks)}")

    # ----------------------------
    # Part 2: Extract Knowledge from Each Chunk
    # ----------------------------
    # Instantiate the Chatbot (for its knowledge extraction capability)
    chatbot = Chatbot()

    # Process each chunk from output_chunks.json
    for i, chunk in enumerate(chunks, start=1):
        text = chunk.get("text", "")
        start_time = chunk.get("start")
        end_time = chunk.get("end")
        print(f"\nProcessing chunk {i} (Start: {start_time}, End: {end_time})")
        # Extract valuable knowledge from the chunk text
        extracted_knowledge = chatbot.extract_valuable_knowledge(text)
        if extracted_knowledge:
            # Replace the generated timestamp with the chunk's start and end times.
            for triplet in extracted_knowledge:
                triplet['start'] = start_time
                triplet['end'] = end_time
                if 'timestamp' in triplet:
                    del triplet['timestamp']
            chatbot.save_knowledge(extracted_knowledge)

    print("\nKnowledge extraction complete.")
    print("Please check 'knowledge.json' for the extracted valuable knowledge.")

if __name__ == "__main__":
    main()

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 26559 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:    

Total chunks loaded: 29


llama_init_from_model: flash_attn    = 0
llama_init_from_model: freq_base     = 1000000.0
llama_init_from_model: freq_scale    = 1
llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
ggml_metal_init: allocating
ggml_metal_init: found device: Apple M3 Max
ggml_metal_init: picking default device: Apple M3 Max
ggml_metal_init: using embedded metal library
ggml_metal_init: GPU name:   Apple M3 Max
ggml_metal_init: GPU family: MTLGPUFamilyApple9  (1009)
ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)
ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)
ggml_metal_init: simdgroup reduction   = true
ggml_metal_init: simdgroup matrix mul. = true
ggml_metal_init: has residency sets    = true
ggml_metal_init: has bfloat            = true
ggml_metal_init: use bfloat            = false
ggml_metal_init: hasUnifiedMemory      = true
ggml_metal_init: recommendedMaxWorkingSetSize  = 28991.03 MB
ggml_metal_init: loaded kern


Processing chunk 1 (Start: 1.448, End: 15.18)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   173 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   116 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6567.60 ms /   289 tokens
Llama.generate: 101 prefix-match hit, remaining 65 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel and EVAP system', 'predicate': 'purpose', 'object': 'prevent harmful vapors from entering the atmosphere causing smog', 'timestamp': '0:00-0:10'}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'timestamp': '0:11-0:18'}]}

Processing chunk 2 (Start: 15.2, End: 28.755)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    65 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   172 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    8187.45 ms /   237 tokens
Llama.generate: 101 prefix-match hit, remaining 35 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'timestamp': 'chunk start:0 chunk end:15'}, {'subject': 'vacuum hoses and electrical connectors', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'timestamp': 'chunk start:28 chunk end:51'}, {'subject': 'the canister', 'predicate': 'is_being_removed_from', 'object': 'above the subframe', 'timestamp': 'chunk start:54 chunk end:77'}]}

Processing chunk 3 (Start: 29.356, End: 35.579)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    35 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   141 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6738.24 ms /   176 tokens
Llama.generate: 101 prefix-match hit, remaining 35 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas tank', 'predicate': 'location', 'object': 'rear seat of the vehicle', 'timestamp': '0:00-0:07'}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'steel', 'timestamp': '0:11-0:13'}, {'subject': 'gas tank', 'predicate': 'attachment method', 'object': 'two straps', 'timestamp': '0:15-0:19'}]}

Processing chunk 4 (Start: 35.679, End: 41.803)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    35 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    60 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3479.08 ms /    95 tokens
Llama.generate: 101 prefix-match hit, remaining 468 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'the strap', 'predicate': 'is held by', 'object': 'two 14mm bolts', 'timestamp': '0:31-0:38'}]}

Processing chunk 5 (Start: 43.564, End: 148.419)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   468 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   671 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   32149.26 ms /  1139 tokens
Llama.generate: 101 prefix-match hit, remaining 147 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel tank', 'predicate': 'contains', 'object': 'harmful vapors', 'timestamp': '0,33'}, {'subject': 'evap canister', 'predicate': 'absorbs', 'object': 'harmful vapors', 'timestamp': '0,33'}, {'subject': 'evap canister', 'predicate': 'vent', 'object': 'harmful vapors', 'timestamp': '0,33'}, {'subject': 'engine', 'predicate': 'burns', 'object': 'harmful vapors', 'timestamp': '0,33'}, {'subject': 'fuel filler neck', 'predicate': 'ventilates', 'object': 'gas tank', 'timestamp': '0,67'}, {'subject': 'fuel filler neck', 'predicate': 'ventilates', 'object': 'EVAP canister', 'timestamp': '0,67'}, {'subject': 'fill check valve', 'predicate': 'ventilates', 'object': 'gas tank', 'timestamp': '0,67'}, {'subject': 'fill check valve', 'predicate': 'ventilates', 'object': 'EVAP canister', 'timestamp': '0,67'}, {'subject': 'vapor pressure sensor', 'predicate': 'takes reading', 'object': 'atmospheric pressure', 'timestamp': '0,67'}, 

llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   147 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   279 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   13037.93 ms /   426 tokens
Llama.generate: 101 prefix-match hit, remaining 22 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fill check valve', 'predicate': 'functions as', 'object': 'allowing excess vapor to escape from the fuel tank into a canister where it gets filtered', 'timestamp': '0:00-0:15'}, {'subject': 'excess air pressure', 'predicate': 'drained out through', 'object': 'the air drain valve at the bottom', 'timestamp': '0:15-0:25'}, {'subject': 'vapor pressure sensor', 'predicate': 'senses', 'object': 'the pressure in the tank', 'timestamp': '0:25-0:35'}, {'subject': 'vacuum switching valve', 'predicate': 'used by ECU', 'object': 'to draw a vacuum on the entire system and monitor the pressure to test for leaks', 'timestamp': '0:35-0:55'}, {'subject': 'check engine light', 'predicate': 'indicating', 'object': 'problems in the system', 'timestamp': '0:55-1:15'}]}

Processing chunk 7 (Start: 180.282, End: 183.305)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    22 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    35 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1759.72 ms /    57 tokens
Llama.generate: 101 prefix-match hit, remaining 51 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'ventilation hoses', 'predicate': 'disconnect', 'object': 'tank side', 'timestamp': 'over here'}]}

Processing chunk 8 (Start: 185.836, End: 194.882)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    51 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   131 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6387.15 ms /   182 tokens
Llama.generate: 101 prefix-match hit, remaining 118 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'timestamp': '0'}, {'subject': 'EVAP check valve', 'predicate': 'has an EVAP line', 'object': 'attached to it', 'timestamp': '0'}, {'subject': 'screws', 'predicate': 'are used to secure', 'object': 'lid', 'timestamp': '29'}]}

Processing chunk 9 (Start: 196.161, End: 222.294)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   118 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    43 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3208.23 ms /   161 tokens
Llama.generate: 101 prefix-match hit, remaining 16 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'check valve', 'predicate': 'remove', 'object': 'six pieces of rust', 'timestamp': '0:21-0:28'}]}

Processing chunk 10 (Start: 222.314, End: 223.915)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    16 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    40 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1902.42 ms /    56 tokens
Llama.generate: 101 prefix-match hit, remaining 179 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'grinding', 'predicate': 'need', 'object': 'off', 'timestamp': '0:00-0:04'}]}

Processing chunk 11 (Start: 224.015, End: 264.245)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   179 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   183 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9248.44 ms /   362 tokens
Llama.generate: 101 prefix-match hit, remaining 15 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'through valve into pipe', 'timestamp': '0:26'}, {'subject': 'vapor', 'predicate': 'escapes through valve', 'object': 'into EVAP canister', 'timestamp': '0:31'}, {'subject': 'remaining pressure', 'predicate': 'exits out here', 'object': 'after vapor enters canister', 'timestamp': '0:33'}, {'subject': 'harmful gases', 'predicate': 'are left inside of', 'object': 'charcoal', 'timestamp': '0:35'}]}

Processing chunk 12 (Start: 264.265, End: 266.767)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     833.85 ms /    23 tokens
Llama.generate: 101 prefix-match hit, remaining 64 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': []}

Processing chunk 13 (Start: 266.787, End: 280.936)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    64 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   240 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11151.78 ms /   304 tokens
Llama.generate: 101 prefix-match hit, remaining 107 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'float', 'predicate': 'rise', 'object': 'when fuel tank is full', 'timestamp': '0:00-0:06'}, {'subject': 'float', 'predicate': 'seal off', 'object': 'vapor ventilation', 'timestamp': '0:06-0:11'}, {'subject': 'float', 'predicate': 'prevent', 'object': 'liquid from going inside charcoal canister', 'timestamp': '0:11-0:18'}, {'subject': 'charcoal canister', 'predicate': 'be harmed by', 'object': 'liquid', 'timestamp': '0:18-0:22'}, {'subject': 'speaker', 'predicate': 'intend to', 'object': "open the float to see what's inside", 'timestamp': '0:22-0:29'}]}

Processing chunk 14 (Start: 281.796, End: 303.949)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   107 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   327 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   15748.31 ms /   434 tokens
Llama.generate: 101 prefix-match hit, remaining 81 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid and diaphragm', 'timestamp': '0:00-0:06'}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'timestamp': '0:06-0:10'}, {'subject': 'atmospheric pressure', 'predicate': 'allows', 'object': 'vapor movement', 'timestamp': '0:10-0:17'}, {'subject': 'fuel cap', 'predicate': 'seals', 'object': 'vapors', 'timestamp': '0:17-0:23'}, {'subject': 'diaphragm', 'predicate': 'seals against', 'object': 'lid', 'timestamp': '0:23-0:28'}, {'subject': 'vapors', 'predicate': 'cannot escape', 'object': 'if fuel cap is sealed', 'timestamp': '0:28-0:35'}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': 'for inspection', 'timestamp': '0:35-0:40'}]}

Processing chunk 15 (Start: 308.171, End: 325.335)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    81 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   162 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    8945.76 ms /   243 tokens
Llama.generate: 101 prefix-match hit, remaining 185 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas tank', 'predicate': 'material', 'object': 'sheet metal', 'timestamp': '0:0'}, {'subject': 'gas tank', 'predicate': 'construction', 'object': 'two halves', 'timestamp': '0:1'}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welded', 'timestamp': '0:7'}, {'subject': 'gas tank', 'predicate': 'baffles', 'object': 'run along the perimeter', 'timestamp': '0:15'}]}

Processing chunk 16 (Start: 325.575, End: 365.911)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   185 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   288 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   14813.69 ms /   473 tokens
Llama.generate: 102 prefix-match hit, remaining 50 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'timestamp': '0:00-0:06'}, {'subject': 'fuel pump', 'predicate': 'location', 'object': 'lowest part of the tank', 'timestamp': '0:07-0:11'}, {'subject': 'evap check valve', 'predicate': 'location', 'object': 'highest part of the tank', 'timestamp': '0:12-0:19'}, {'subject': 'check valve', 'predicate': 'function', 'object': 'allows fluid entry and prevents escape', 'timestamp': '0:20-0:32'}, {'subject': 'maximum gasoline level', 'predicate': 'determined by', 'object': 'height of fuel fill check valve', 'timestamp': '0:33-0:40'}, {'subject': 'remaining tank area', 'predicate': 'purpose', 'object': 'fuel vapor storage', 'timestamp': '0:41-0:47'}]}

Processing chunk 17 (Start: 367.711, End: 378.521)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    50 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   151 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    7757.94 ms /   201 tokens
Llama.generate: 101 prefix-match hit, remaining 39 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas caps', 'predicate': 'are sealed', 'object': 'on newer OBD2 cars', 'timestamp': '0:00-0:06'}, {'subject': 'gas caps', 'predicate': 'are part of a sealed system', 'object': 'including the gas tank', 'timestamp': '0:07-0:16'}, {'subject': 'gas cap', 'predicate': 'is', 'object': 'just a cap', 'timestamp': '0:17-0:23'}]}

Processing chunk 18 (Start: 378.841, End: 385.745)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    39 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   248 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11515.27 ms /   287 tokens
Llama.generate: 103 prefix-match hit, remaining 11 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'check valve', 'predicate': 'is inside', 'object': 'here', 'timestamp': '0:00:00,000-0:00:02,000'}, {'subject': 'check valve', 'predicate': 'can be removed', 'object': 'by removing', 'timestamp': '0:00:02,000-0:00:04,000'}, {'subject': 'filter', 'predicate': 'is present', 'object': 'inside the device after removing check valve', 'timestamp': '0:00:05,000-0:00:08,000'}, {'subject': 'spring', 'predicate': 'is present', 'object': 'inside the device after removing check valve', 'timestamp': '0:00:08,000-0:00:10,000'}]}

Processing chunk 19 (Start: 386.586, End: 387.967)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    11 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    49 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2172.01 ms /    60 tokens
Llama.generate: 101 prefix-match hit, remaining 11 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'valve', 'predicate': 'is', 'object': 'itself', 'timestamp': '0 11'}]}

Processing chunk 20 (Start: 388.627, End: 389.808)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    11 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    54 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2393.72 ms /    65 tokens
Llama.generate: 101 prefix-match hit, remaining 145 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'spring', 'predicate': 'followed by', 'object': 'spring', 'timestamp': '0:12-0:18'}]}

Processing chunk 21 (Start: 390.148, End: 420.44)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   145 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   180 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    8960.98 ms /   325 tokens
Llama.generate: 101 prefix-match hit, remaining 78 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'Gas cap', 'predicate': 'usually sealed', 'object': 'in normal situations', 'timestamp': '0,13'}, {'subject': 'Safety valve', 'predicate': 'exists', 'object': 'to vent pressure', 'timestamp': '13,23'}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'reads pressure', 'object': 'in the tank', 'timestamp': '53,60'}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'measures differential', 'object': 'between tank pressure and atmospheric pressure', 'timestamp': '60,74'}]}

Processing chunk 22 (Start: 420.46, End: 435.548)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    78 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   103 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5541.82 ms /   181 tokens
Llama.generate: 101 prefix-match hit, remaining 164 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'ECU', 'predicate': 'uses', 'object': 'voltage from middle terminal to determine EVAP system leaks', 'timestamp': '15-20'}, {'subject': 'vacuum switching valve', 'predicate': 'needs to be', 'object': 'cleaned from rust', 'timestamp': '25-30'}]}

Processing chunk 23 (Start: 435.808, End: 468.877)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   164 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   419 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   19959.59 ms /   583 tokens
Llama.generate: 104 prefix-match hit, remaining 59 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air in the EVAP system', 'timestamp': '0,108'}, {'subject': 'vacuum switching valve', 'predicate': 'takes inlet air from', 'object': 'gas tank', 'timestamp': '1,23'}, {'subject': 'vacuum switching valve', 'predicate': 'vents air to', 'object': 'charcoal canister', 'timestamp': '2,25'}, {'subject': 'vacuum switching valve', 'predicate': 'is', 'object': 'a very simple solenoid', 'timestamp': '3,26'}, {'subject': 'vacuum switching valve', 'predicate': 'applies 12 volts to', 'object': 'move plunger outward or inward', 'timestamp': '5,27'}, {'subject': 'vacuum switching valve', 'predicate': 'allows or blocks', 'object': 'flow of vapors going into canister', 'timestamp': '6,28'}, {'subject': 'vacuum switching valve', 'predicate': 'clicking sound indicates', 'object': 'redirecting air from inlet to outlet', 'timestamp': '8,29'}, {'subject': 'vacuu

llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    59 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   201 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9526.70 ms /   260 tokens
Llama.generate: 101 prefix-match hit, remaining 42 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'timestamp': '0:00-0:06'}, {'subject': 'EVAP charcoal canister', 'predicate': 'process', 'object': 'absorb gasoline vapors by charcoal', 'timestamp': '0:06-0:12'}, {'subject': 'EVAP charcoal canister', 'predicate': 'process', 'object': 'release gasoline vapors back into intake', 'timestamp': '0:12-0:19'}, {'subject': 'check valves', 'predicate': 'location', 'object': 'here', 'timestamp': '0:20-0:27'}]}

Processing chunk 25 (Start: 482.045, End: 489.831)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    42 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   157 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    7582.24 ms /   199 tokens
Llama.generate: 101 prefix-match hit, remaining 29 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'one-way valves', 'predicate': 'allow', 'object': 'air to go in', 'timestamp': '0:00-0:08'}, {'subject': 'one-way valves', 'predicate': "don't allow", 'object': 'air to come out', 'timestamp': '0:08-0:12'}, {'subject': 'one-way valves', 'predicate': 'are essential for', 'object': 'running the self-diagnostic for the EVAP system', 'timestamp': '0:12-0:22'}]}

Processing chunk 26 (Start: 489.851, End: 494.159)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    29 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   111 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4906.14 ms /   140 tokens
Llama.generate: 104 prefix-match hit, remaining 171 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'the user', 'predicate': 'pulls out', 'object': 'check valves', 'timestamp': '0:00:00-0:00:03'}, {'subject': 'the user', 'predicate': 'chops open', 'object': 'something', 'timestamp': '0:00:04-0:00:07'}]}

Processing chunk 27 (Start: 496.587, End: 534.03)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   171 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   241 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11658.05 ms /   412 tokens
Llama.generate: 102 prefix-match hit, remaining 97 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'check valve', 'predicate': 'acts as', 'object': 'ball check valve', 'timestamp': '0:00-0:08'}, {'subject': 'air pressure', 'predicate': 'causes', 'object': 'diaphragm to rise', 'timestamp': '0:09-0:16'}, {'subject': 'diaphragm', 'predicate': 'allows air to escape', 'object': 'through two ports', 'timestamp': '0:17-0:26'}, {'subject': 'air pressure', 'predicate': 'prevents air from entering', 'object': 'through the other side', 'timestamp': '0:27-0:36'}, {'subject': 'charcoal canister', 'predicate': 'contains', 'object': 'paper filter', 'timestamp': '0:37-0:41'}]}

Processing chunk 28 (Start: 534.611, End: 554.608)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    97 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   272 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   12757.09 ms /   369 tokens
Llama.generate: 102 prefix-match hit, remaining 126 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'activated charcoal', 'predicate': 'absorbs', 'object': 'gasoline vapors', 'timestamp': '00:00:00-00:00:10'}, {'subject': 'activated charcoal', 'predicate': 'smells like', 'object': 'gasoline', 'timestamp': '00:00:10-00:00:15'}, {'subject': 'activated charcoal', 'predicate': 'looks like', 'object': 'rat shit', 'timestamp': '00:00:15-00:00:25'}, {'subject': 'charcoal', 'predicate': 'used for', 'object': 'barbecue', 'timestamp': '00:00:35-00:00:40'}, {'subject': 'foam things', 'predicate': 'sit on', 'object': 'springs', 'timestamp': '00:00:45-00:00:55'}]}

Processing chunk 29 (Start: 554.708, End: 579.948)


llama_perf_context_print:        load time =    1834.51 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   126 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   277 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   12823.97 ms /   403 tokens


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'spring with the pad', 'predicate': 'applies pressure to', 'object': 'charcoal', 'timestamp': '0:00:00-0:00:04'}, {'subject': 'two chambers', 'predicate': 'are separated by', 'object': 'a wall', 'timestamp': '0:00:10-0:00:15'}, {'subject': 'vapors', 'predicate': 'enter from', 'object': 'one side', 'timestamp': '0:00:17-0:00:21'}, {'subject': 'vapors', 'predicate': 'exit from', 'object': 'the other side', 'timestamp': '0:00:24-0:00:28'}, {'subject': 'EVAP system', 'predicate': 'works by', 'object': 'allowing maximum surface area of vapors to be absorbed through charcoal', 'timestamp': '0:00:33-0:00:40'}]}

Knowledge extraction complete.
Please check 'knowledge.json' for the extracted valuable knowledge.


## TIMESTAMPS WEG NU ALLEEN START+EIND

In [22]:
import json
import os
import pickle
import threading
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

# -------------------------------
# The Chatbot and LlamaSingleton
# -------------------------------

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        """
        Sends the chunk text to the model and asks it to return JSON with
        subject/predicate/object. No timestamps are generated by the model.
        """
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\"\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"}
                                },
                                "required": ["subject", "predicate", "object"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print("Extracted knowledge from a chunk:", knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_knowledge(self, triplets):
        """
        Persists triplets to `knowledge.json` and updates FAISS index if new triplets
        are found. We do not add any timestamps here.
        """
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

# -------------------------------
# Main Processing: Load Chunks and Extract Knowledge
# -------------------------------

def main():
    # 1. Load the chunks from output_chunks.json
    with open('output_chunks.json', 'r', encoding='utf-8') as file:
        data = json.load(file)
    chunks = data.get("chunks", [])
    print(f"Total chunks loaded: {len(chunks)}")

    # 2. Extract Knowledge from Each Chunk
    chatbot = Chatbot()

    for i, chunk in enumerate(chunks, start=1):
        text = chunk.get("text", "")
        start_time = chunk.get("start")
        end_time = chunk.get("end")

        print(f"\nProcessing chunk {i} (Start: {start_time}, End: {end_time})")

        # Extract valuable knowledge from the chunk text
        extracted_knowledge = chatbot.extract_valuable_knowledge(text)

        if extracted_knowledge:
            # Attach the chunk's start/end to each extracted item
            for triplet in extracted_knowledge:
                triplet['start'] = start_time
                triplet['end'] = end_time

            # Save the extracted knowledge
            chatbot.save_knowledge(extracted_knowledge)

    print("\nKnowledge extraction complete.")
    print("Please check 'knowledge.json' for the extracted valuable knowledge.")


if __name__ == "__main__":
    main()

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 26384 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:    

Total chunks loaded: 29


load_tensors: layer   0 assigned to device CPU
load_tensors: layer   1 assigned to device CPU
load_tensors: layer   2 assigned to device CPU
load_tensors: layer   3 assigned to device CPU
load_tensors: layer   4 assigned to device CPU
load_tensors: layer   5 assigned to device CPU
load_tensors: layer   6 assigned to device CPU
load_tensors: layer   7 assigned to device CPU
load_tensors: layer   8 assigned to device CPU
load_tensors: layer   9 assigned to device CPU
load_tensors: layer  10 assigned to device CPU
load_tensors: layer  11 assigned to device CPU
load_tensors: layer  12 assigned to device CPU
load_tensors: layer  13 assigned to device CPU
load_tensors: layer  14 assigned to device CPU
load_tensors: layer  15 assigned to device CPU
load_tensors: layer  16 assigned to device CPU
load_tensors: layer  17 assigned to device CPU
load_tensors: layer  18 assigned to device CPU
load_tensors: layer  19 assigned to device CPU
load_tensors: layer  20 assigned to device CPU
load_tensors:


Processing chunk 1 (Start: 1.448, End: 15.18)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   154 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    86 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5247.00 ms /   240 tokens
Llama.generate: 82 prefix-match hit, remaining 65 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel and EVAP system', 'predicate': 'purpose', 'object': 'prevent harmful vapors from entering the atmosphere causing smog'}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side'}]}

Processing chunk 2 (Start: 15.2, End: 28.755)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    65 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   163 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    7965.35 ms /   228 tokens
Llama.generate: 82 prefix-match hit, remaining 35 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust'}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister'}, {'subject': 'electrical_connectors', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister'}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe'}]}

Processing chunk 3 (Start: 29.356, End: 35.579)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    35 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   100 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5190.01 ms /   135 tokens
Llama.generate: 82 prefix-match hit, remaining 35 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle'}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel'}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps'}]}

Processing chunk 4 (Start: 35.679, End: 41.803)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    35 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    75 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4096.83 ms /   110 tokens
Llama.generate: 82 prefix-match hit, remaining 468 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'thing', 'predicate': 'expected_to_snap', 'object': 'soon'}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank'}]}

Processing chunk 5 (Start: 43.564, End: 148.419)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   468 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   229 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   12886.34 ms /   697 tokens
Llama.generate: 82 prefix-match hit, remaining 147 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt'}, {'subject': 'purge line', 'predicate': 'vent harmful fumes', 'object': 'to engine'}, {'subject': 'air inlet line', 'predicate': 'comes from', 'object': 'air filter'}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister'}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines'}, {'subject': 'onboard recovery valve', 'predicate': 'ventilating gas tank', 'object': 'to EVAP canister'}, {'subject': 'vapor pressure sensor', 'predicate': 'take reading from', 'object': 'fuel tank'}]}

Processing chunk 6 (Start: 148.639, End: 180.042)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   147 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   247 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11750.70 ms /   394 tokens
Llama.generate: 82 prefix-match hit, remaining 22 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister'}, {'subject': 'air drain valve', 'predicate': 'drains', 'object': 'excess air pressure'}, {'subject': 'vapor pressure sensor', 'predicate': 'senses', 'object': 'pressure in the tank'}, {'subject': 'vacuum switching valve', 'predicate': 'works with', 'object': 'air inlet to draw a vacuum on the entire system'}, {'subject': 'ECU', 'predicate': 'uses', 'object': 'vacuum switching valve and air inlet to monitor pressure in the system'}, {'subject': 'ECU', 'predicate': 'tests', 'object': 'for leaks in the system'}, {'subject': 'check engine light', 'predicate': 'indicates', 'object': 'problems in the system'}]}

Processing chunk 7 (Start: 180.282, End: 183.305)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    22 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    28 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1563.96 ms /    50 tokens
Llama.generate: 82 prefix-match hit, remaining 51 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses'}]}

Processing chunk 8 (Start: 185.836, End: 194.882)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    51 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    59 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3520.88 ms /   110 tokens
Llama.generate: 82 prefix-match hit, remaining 118 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust'}, {'subject': 'EVAP check valve', 'predicate': 'has EVAP line removed from', 'object': 'it'}]}

Processing chunk 9 (Start: 196.161, End: 222.294)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   118 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   100 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5595.69 ms /   218 tokens
Llama.generate: 82 prefix-match hit, remaining 16 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust'}, {'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline'}, {'subject': 'evap float', 'predicate': 'locks off', 'object': 'middle valve'}]}

Processing chunk 10 (Start: 222.314, End: 223.915)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    16 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     771.34 ms /    24 tokens
Llama.generate: 82 prefix-match hit, remaining 179 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': []}

Processing chunk 11 (Start: 224.015, End: 264.245)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   179 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   193 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10015.41 ms /   372 tokens
Llama.generate: 82 prefix-match hit, remaining 15 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor'}, {'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister'}, {'subject': 'vapor', 'predicate': 'enters', 'object': 'canister'}, {'subject': 'remaining pressure', 'predicate': 'exits', 'object': 'out here'}, {'subject': 'canister', 'predicate': 'removes', 'object': 'harmful gases'}, {'subject': 'canister', 'predicate': 'contains', 'object': 'charcoal'}]}

Processing chunk 12 (Start: 264.265, End: 266.767)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     794.02 ms /    23 tokens
Llama.generate: 82 prefix-match hit, remaining 64 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': []}

Processing chunk 13 (Start: 266.787, End: 280.936)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    64 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   102 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5525.07 ms /   166 tokens
Llama.generate: 82 prefix-match hit, remaining 107 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister'}, {'subject': 'fuel tank', 'predicate': 'full', 'object': 'float will rise'}, {'subject': 'float', 'predicate': 'seals off', 'object': 'vapor ventilation'}]}

Processing chunk 14 (Start: 281.796, End: 303.949)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   107 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   222 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10923.73 ms /   329 tokens
Llama.generate: 82 prefix-match hit, remaining 81 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid'}, {'subject': 'lid', 'predicate': 'exposes', 'object': 'diaphragm'}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down'}, {'subject': 'atmospheric pressure', 'predicate': 'allows', 'object': 'vapor to move'}, {'subject': 'vapor', 'predicate': 'moves into', 'object': 'charcoal canister'}, {'subject': 'sealed fuel cap', 'predicate': 'prevents', 'object': 'vapors from escaping'}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside"}]}

Processing chunk 15 (Start: 308.171, End: 325.335)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    81 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   184 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9387.58 ms /   265 tokens
Llama.generate: 82 prefix-match hit, remaining 185 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal'}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping'}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves'}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding'}, {'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles'}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank'}]}

Processing chunk 16 (Start: 325.575, End: 365.911)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   185 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   270 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   13254.05 ms /   455 tokens
Llama.generate: 83 prefix-match hit, remaining 50 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing'}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank'}, {'subject': 'evap check valve', 'predicate': 'is located', 'object': 'at the highest part of the tank'}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve'}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank'}, {'subject': 'check valve', 'predicate': 'prevents fluid exit', 'object': 'from the filler neck'}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve'}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation'}]}

Processing chunk 17 (Start: 367.711, End: 378.521)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    50 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   130 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6295.48 ms /   180 tokens
Llama.generate: 82 prefix-match hit, remaining 39 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system'}, {'subject': 'gas cap', 'predicate': 'is sealed', 'object': 'on newer OBD2 cars'}, {'subject': 'gas cap', 'predicate': 'forms part of', 'object': 'sealed system'}, {'subject': 'sealed system', 'predicate': 'includes', 'object': 'gas tank'}]}

Processing chunk 18 (Start: 378.841, End: 385.745)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    39 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    68 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3896.96 ms /   107 tokens
Llama.generate: 84 prefix-match hit, remaining 11 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'check valve', 'predicate': 'is present in', 'object': 'here'}, {'subject': 'filter', 'predicate': 'is present in', 'object': 'here'}, {'subject': 'spring', 'predicate': 'is present in', 'object': 'here'}]}

Processing chunk 19 (Start: 386.586, End: 387.967)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    11 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    27 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1328.32 ms /    38 tokens
Llama.generate: 82 prefix-match hit, remaining 11 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'valve', 'predicate': 'is', 'object': 'itself'}]}

Processing chunk 20 (Start: 388.627, End: 389.808)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    11 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     503.72 ms /    19 tokens
Llama.generate: 82 prefix-match hit, remaining 145 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': []}

Processing chunk 21 (Start: 390.148, End: 420.44)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   145 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   198 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9749.41 ms /   343 tokens
Llama.generate: 82 prefix-match hit, remaining 78 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'Gas cap', 'predicate': 'usually sealed', 'object': 'in normal situations'}, {'subject': 'Safety valve', 'predicate': 'exists', 'object': 'in case of high pressure'}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube'}, {'subject': 'Filler port', 'predicate': 'has', 'object': 'plastic collar with a flap'}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'measures', 'object': 'pressure differential between tank and atmospheric pressure'}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals'}]}

Processing chunk 22 (Start: 420.46, End: 435.548)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    78 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   107 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5557.11 ms /   185 tokens
Llama.generate: 82 prefix-match hit, remaining 164 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'ECU', 'predicate': 'uses', 'object': 'voltage from middle terminal'}, {'subject': 'voltage from middle terminal', 'predicate': 'determines', 'object': 'EVAP system leaks'}, {'subject': 'vacuum switching valve', 'predicate': 'is being', 'object': 'broken rust off'}]}

Processing chunk 23 (Start: 435.808, End: 468.877)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   164 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   224 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10730.82 ms /   388 tokens
Llama.generate: 85 prefix-match hit, remaining 59 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister'}, {'subject': 'vacuum switching valve', 'predicate': 'activated by', 'object': 'application of 12 volts'}, {'subject': 'vacuum switching valve', 'predicate': 'operates through', 'object': 'a plunger moving outward or inward'}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister'}, {'subject': 'vacuum switching valve', 'predicate': 'blocks', 'object': 'air flow to charcoal canister'}, {'subject': 'vacuum switching valve', 'predicate': 'clicks when', 'object': 'air flow direction is changed'}]}

Processing chunk 24 (Start: 469.037, End: 482.005)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    59 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   142 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6781.64 ms /   201 tokens
Llama.generate: 82 prefix-match hit, remaining 42 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors'}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors'}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 're-release gasoline vapors'}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors'}]}

Processing chunk 25 (Start: 482.045, End: 489.831)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    42 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   110 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5628.41 ms /   152 tokens
Llama.generate: 82 prefix-match hit, remaining 29 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'one-way valves', 'predicate': 'allow', 'object': 'air to go in'}, {'subject': 'one-way valves', 'predicate': 'not allow', 'object': 'air to come out'}, {'subject': 'one-way valves', 'predicate': 'essential for', 'object': 'running the self-diagnostic for the EVAP system'}]}

Processing chunk 26 (Start: 489.851, End: 494.159)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    29 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    28 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1609.65 ms /    57 tokens
Llama.generate: 85 prefix-match hit, remaining 171 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves'}]}

Processing chunk 27 (Start: 496.587, End: 534.03)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   171 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   239 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11582.22 ms /   410 tokens
Llama.generate: 83 prefix-match hit, remaining 97 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached'}, {'subject': 'Diaphragm', 'predicate': 'functions as', 'object': 'a component that rises when air pressure builds up'}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve'}, {'subject': 'Air pressure', 'predicate': 'triggers', 'object': 'the release of air'}, {'subject': 'Air', 'predicate': 'can escape', 'object': 'through two ports on the side'}, {'subject': 'Air', 'predicate': 'cannot enter', 'object': 'the canister from this side'}, {'subject': 'Paper filter', 'predicate': 'is part of', 'object': 'the charcoal canister'}]}

Processing chunk 28 (Start: 534.611, End: 554.608)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    97 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   130 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6684.32 ms /   227 tokens
Llama.generate: 83 prefix-match hit, remaining 126 prompt tokens to eval


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'Activated charcoal', 'predicate': 'Absorbs', 'object': 'Gasoline vapors'}, {'subject': 'Activated charcoal', 'predicate': 'Smells like', 'object': 'Gasoline'}, {'subject': 'Activated charcoal', 'predicate': 'Looks like', 'object': 'Small pieces of rat shit'}, {'subject': 'Activated charcoal', 'predicate': 'Amount', 'object': 'A lot'}]}

Processing chunk 29 (Start: 554.708, End: 579.948)


llama_perf_context_print:        load time =    1721.96 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   126 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   175 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9012.90 ms /   301 tokens


Extracted knowledge from a chunk: {'valuable_knowledge': [{'subject': 'spring with the pad', 'predicate': 'applies pressure', 'object': 'charcoal'}, {'subject': 'charcoal', 'predicate': 'becomes compact', 'object': 'due to pressure'}, {'subject': 'EVAP system', 'predicate': 'has two chambers', 'object': 'with a wall separating them'}, {'subject': 'vapors', 'predicate': 'enter from one side', 'object': 'and exit from the other side'}, {'subject': 'vapors', 'predicate': 'have maximum surface area', 'object': 'absorbed by charcoal'}]}

Knowledge extraction complete.
Please check 'knowledge.json' for the extracted valuable knowledge.


## TEST door te chatten

In [27]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def save_json_data(self, file_path, data):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

    def extract_valuable_knowledge(self, message):
        response = self.llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a knowledge extractor. Try to Extract any knowledge from the user.\n"
                        "Return ONLY JSON with the following schema:\n"
                        "{\n"
                        "  \"valuable_knowledge\": [\n"
                        "    {\n"
                        "      \"subject\": \"...\",\n"
                        "      \"predicate\": \"...\",\n"
                        "      \"object\": \"...\",\n"
                        "      \"timestamp\": \"...\"  # ISO8601\n"
                        "    }\n"
                        "  ]\n"
                        "}\n"
                        "If no knowledge can be extracted, return:\n"
                        "{\"valuable_knowledge\": []}"
                    )
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json",
                "schema": {
                    "type": "object",
                    "properties": {
                        "valuable_knowledge": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "subject": {"type": "string"},
                                    "predicate": {"type": "string"},
                                    "object": {"type": "string"},
                                    "timestamp": {"type": "string", "format": "date-time"}
                                },
                                "required": ["subject", "predicate", "object", "timestamp"]
                            }
                        }
                    },
                    "required": ["valuable_knowledge"],
                },
            },
            temperature=0.5,
        )
        try:
            knowledge_data = json.loads(response['choices'][0]['message']['content'])
            print(knowledge_data)
            if "valuable_knowledge" not in knowledge_data:
                knowledge_data["valuable_knowledge"] = []
            return knowledge_data["valuable_knowledge"]
        except (JSONDecodeError, KeyError):
            return []

    def save_message(self, role, content):
        messages = self.load_json_data(self.messages_file)
        message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
        messages.append(message)
        self.save_json_data(self.messages_file, messages)

    def save_knowledge(self, triplets):
        if not triplets:
            return
        knowledge = self.load_json_data(self.knowledge_file)
        existing_set = {(t['subject'], t['predicate'], t['object']) for t in knowledge}
        new_triplets = []
        for triplet in triplets:
            triplet['timestamp'] = datetime.utcnow().isoformat()
            key = (triplet['subject'], triplet['predicate'], triplet['object'])
            if key not in existing_set:
                knowledge.append(triplet)
                new_triplets.append(triplet)
                existing_set.add(key)
        self.save_json_data(self.knowledge_file, knowledge)
        if new_triplets:
            self.update_faiss_index(new_triplets)

    def update_faiss_index(self, triplets):
        texts = [f"{t['subject']} {t['predicate']} {t['object']}" for t in triplets]
        embeddings = self.model.encode(texts)
        if self.index is None:
            self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings, dtype=np.float32))
        self.knowledge_data.extend(triplets)
        self.save_faiss_index()

    def save_faiss_index(self):
        with open(self.faiss_index_file, 'wb') as f:
            pickle.dump((self.index, self.knowledge_data), f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, conversation_history, user_message):
        knowledge_matches = None #self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Answer based on retrieved knowledge:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Videotimestamps: start: {t['start']}, end: {t['end']})\n"
            
        else:
            system_message += "No direct related knowledge found. Proceeding with general reasoning.\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}] #+ conversation_history
        enriched_history.append({"role": "user", "content": user_message})
        print(enriched_history)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.7,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            self.save_message(role='user', content=user_message)
            conversation = self.load_json_data(self.messages_file)[-3:]
            assistant_response = self.generate_response(conversation, user_message)
            print(f"Assistant: {assistant_response}")
            #generate_speech(assistant_response)
            self.save_message(role='assistant', content=assistant_response)
            #user_knowledge_response = self.extract_valuable_knowledge(user_message)
            #print(user_knowledge_response)  
            #if user_knowledge_response:
                #self.save_knowledge(user_knowledge_response)

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 25280 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct-Uncensored.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:    

Chatbot is ready! Type 'exit' to end the conversation.


  message = {"role": role, "content": content, "timestamp": datetime.utcnow().isoformat()}
  current_time = datetime.utcnow().isoformat()


[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T13:00:41.143422\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"question": "Which component is responsible for sensing the fuel tank\'s pressure and comparing it with atmospheric pressure?",       "options": [         "Fuel filler neck",         "Vapor pressure sensor",         "Vacuum switching valve",         "Purge line"       ],       "answer": "Vapor pressure sensor”'}]


llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   130 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   379 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   17312.43 ms /   509 tokens


Assistant: The component responsible for sensing the fuel tank's pressure and comparing it with atmospheric pressure is the vapor pressure sensor. Here's a detailed explanation:

1. **Vapor Pressure Sensor**:
   - The vapor pressure sensor is specifically designed to monitor the pressure inside the fuel tank. It can measure the pressure difference between the fuel tank and the atmosphere.
   - This sensor is usually integrated into the fuel filler neck, which is the part of the fuel system that connects the fuel pump to the fuel tank. 
   - The vapor pressure sensor sends information about the fuel tank's pressure to the vehicle's electronic control unit (ECU), which uses this data to adjust the fuel pressure as needed.

2. **Fuel Filler Neck**:
   - The fuel filler neck is a component of the fuel system that provides access to the fuel tank. It contains the vapor pressure sensor but is not the sensor itself.
   - The fuel filler neck is involved in fueling the vehicle and venting the 

Llama.generate: 31 prefix-match hit, remaining 89 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    89 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   224 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10565.49 ms /   313 tokens


Assistant: The component responsible for sensing the fuel tank's pressure and comparing it with atmospheric pressure is the vapor pressure sensor.

Here's an explanation of each option:

1. **Fuel filler neck**: This is the part of the fuel system where fuel is added to the vehicle. It is not involved in the sensing of fuel tank pressure.

2. **Vapor pressure sensor**: This component is specifically designed to sense the pressure inside the fuel tank and compare it with atmospheric pressure. It is typically part of the fuel vapor recovery system and is essential for maintaining the correct pressure balance in the fuel tank.

3. **Vacuum switching valve**: This component is used to manage the flow of fuel vapors between the fuel tank and the charcoal canister. It does not directly sense the pressure of the fuel tank.

4. **Purge line**: This is a part of the fuel vapor recovery system that allows the fuel vapor from the charcoal canister to be vented to the fuel tank when the engine is 

Llama.generate: 31 prefix-match hit, remaining 108 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   108 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   408 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   18074.80 ms /   516 tokens


Assistant: The vacuum switching valve plays a key role in the EVAP (Evaporative Emission Control) system. The correct answer is: "It directs air flow within the system when 12 volts is applied". Here's a detailed explanation:

1. **Function of the EVAP System**: The purpose of the EVAP system is to contain and recover fuel vapors that are generated in the fuel tank and fuel lines. It does this by sealing the fuel vapor collection system from the atmosphere and recycling the vapors back to the fuel tank to be burned.

2. **Role of the Vacuum Switching Valve**: The vacuum switching valve is part of the purge control system. When the engine is running, it directs air flow into the charcoal canister (which holds the fuel vapors), allowing the vapors to be drawn into the engine and burned. The valve switches between the canister and the atmosphere, directing air flow into the canister when the engine is running and into the atmosphere when it's not.

3. **Operation**: The valve's operation 

Llama.generate: 30 prefix-match hit, remaining 83 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    83 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   303 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   13845.45 ms /   386 tokens


Assistant: The component in the system that uses activated charcoal is the EVAP (Evaporative Emission Control) canister.

Here's a detailed explanation:

1. **Fuel Filler Neck**: This is the opening where you fill the vehicle with fuel. It does not contain activated charcoal. It is primarily designed to allow fuel to enter the tank and maintain a secure seal.

2. **Gas Tank**: While the fuel tank is essential for storing fuel, it does not incorporate activated charcoal. The tank's primary function is to contain the fuel and prevent physical damage.

3. **EVAP Canister**: This component is specifically designed to use activated charcoal. Its main purpose is to filter and store fuel vapors that might escape from the fuel tank. Activated charcoal is an excellent material for adsorption, which means it can attract and hold onto fuel vapors. The canister is connected to the fuel tank and the intake manifold. When the engine is running, the vacuum from the intake manifold draws fuel vapors f

Llama.generate: 30 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   215 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10262.47 ms /   314 tokens


Assistant: The correct answer is: "To prevent fuel from sloshing as the vehicle takes corners."

Explanation:
Baffles inside a gas tank are designed to manage the movement of fuel within the tank, particularly as the vehicle accelerates, brakes, or turns. The primary purpose of baffles is to keep the fuel from sloshing, which can lead to uneven fuel distribution and potential fuel starvation in the fuel pump or carburetor.

Let's briefly address the other options:

1. "To increase fuel capacity": Baffles do not increase the total fuel storage capacity of the tank. They simply manage the fuel's movement within the tank.

2. "To cool the fuel": Baffles are not designed to cool the fuel. The fuel temperature is more influenced by the ambient temperature, the fuel system's design, and the vehicle's overall thermal management.

3. "To filter fuel impurities": Fuel filters are typically installed in the fuel line between the fuel pump and the engine. Baffles do not filter impurities from the

Llama.generate: 31 prefix-match hit, remaining 175 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   175 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   112 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6493.48 ms /   287 tokens


Assistant: The baffles inside a gas tank are designed to prevent fuel from sloshing as the vehicle takes corners. Baffles are typically flat or curved plates placed inside the fuel tank to keep the fuel from moving around too much when the vehicle is turning. This is important because if the fuel sloshed around too much, it could potentially cause the fuel pump to suck in air instead of fuel, which could lead to fuel starvation and engine failure. So the correct answer to your question is option B: To prevent fuel from sloshing as the vehicle takes corners.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T13:36:01.718198\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"statement": "The EVAP system is designed to reduce harmful vapor emissions by capturing fuel vapors before they escape into the atmosphere.",       "answer": true'}]


Llama.generate: 30 prefix-match hit, remaining 67 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    67 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   390 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   17539.68 ms /   457 tokens


Assistant: The statement about the EVAP (Evaporative Emission Control) system is correct. The purpose of an EVAP system is indeed to minimize the emission of harmful gases into the air by capturing fuel vapors before they are released from the fuel tank. This system is a crucial part of modern vehicle emission control systems.

Here's a more detailed explanation of how an EVAP system works:

1. **Fuel Vapor Collection**: The fuel vapor control system collects fuel vapors that are generated in the fuel tank and in the fuel lines. These vapors are a byproduct of the fuel evaporation process.

2. **Activated Charcoal Canister**: The fuel vapors are stored in an activated charcoal canister. Activated charcoal has a high adsorption capacity, meaning it can hold a large amount of fuel vapors without getting saturated.

3. **Evaporation Emission Control Valve (EVAP Valve)**: This valve regulates the flow of fuel vapor from the canister to the intake manifold, where the vapors are burned along

Llama.generate: 33 prefix-match hit, remaining 59 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    59 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   437 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   19174.18 ms /   496 tokens


Assistant: The statement is correct. The Evaporative Emission Control System, commonly known as the EVAP system, is a vital component of a vehicle's emission control system. Its primary function is to manage fuel vapors, which are a by-product of the evaporation of fuel in the fuel tank.

Here's a more detailed breakdown of the EVAP system and its function:

1. **Fuel Tank**: Fuel is stored in the tank, and as it evaporates, fuel vapors are produced. These vapors are less harmful than the actual fuel, but they're still pollutants that need to be controlled.

2. **Charcoal Canister**: The EVAP system captures these fuel vapors in a canister filled with activated charcoal. The charcoal has a high adsorption capacity, which means it can hold a large amount of fuel vapors without breaking down.

3. **Purge Valves and Control Module**: When the engine is running, the control module sends a signal to the purge valve. This valve opens, allowing the fuel vapors to be drawn from the canister in

Llama.generate: 30 prefix-match hit, remaining 54 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    54 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   240 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11020.58 ms /   294 tokens


Assistant: The EVAP (Evaporative Emission Control) system is a crucial part of modern vehicles' exhaust systems. It's designed to capture fuel vapors that can escape from the fuel tank and prevent them from being emitted into the atmosphere.

The location of the EVAP canister can vary depending on the make and model of the vehicle. However, it's common for the canister to be located near the fuel tank, often in the rear part of the vehicle. This is because the fuel vapors tend to rise and accumulate near the top of the fuel tank.

In the case of the statement you've provided, it's accurate to say that the EVAP canister is often located underneath the rear seat of the vehicle. This placement allows for easy access when servicing the system. 

However, it's important to note that this is not a universal truth for every vehicle. Some cars may have the EVAP canister in a different location, such as near the fuel pump or even in the engine bay. Therefore, when performing maintenance on a ve

Llama.generate: 31 prefix-match hit, remaining 132 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   132 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   133 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    7135.24 ms /   265 tokens


Assistant: False.

The statement "The EVAP canister is located underneath the rear seat of the vehicle" is not generally true for all vehicles. The exact location of the EVAP (Evaporative Emission Control) canister can vary depending on the vehicle's make, model, and engine configuration. Typically, the EVAP canister is found in the rear part of the vehicle's fuel tank, but it can also be located in other areas. Some vehicles have the EVAP canister in the engine compartment, while others might have it in a location that is not easily accessible. The rear seat area is not a common location for the EVAP canister.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T13:44:27.821261\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"statement": "The fuel and EVAP system includes the fuel tank, EVAP canister, vacuum lines, and the fuel filler neck.",       "answer": true'}]


Llama.generate: 31 prefix-match hit, remaining 68 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    68 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   213 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    9958.50 ms /   281 tokens


Assistant: To determine if the given statement is correct, let's break down the components mentioned in the statement and verify each one:

1. Fuel Tank: This is a component of the fuel and EVAP (Evaporative Emission Control) system. The fuel tank stores the fuel and is connected to the fuel lines and filler neck. 

2. EVAP Canister: The EVAP canister is a crucial component of the fuel and EVAP system. Its purpose is to capture and store fuel vapors that are produced during the fuel vaporization process. 

3. Vacuum Lines: These lines are part of the fuel system. They are used to regulate the pressure in the fuel system and ensure a smooth flow of fuel. 

4. Fuel Filler Neck: This is the part of the vehicle where the fuel is added. It is connected to the fuel tank and is an essential component of the fuel and EVAP system.

Given the above information, the statement correctly identifies the components included in the fuel and EVAP system. Therefore, the answer is true.
[{'role': 'system

Llama.generate: 33 prefix-match hit, remaining 61 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    61 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   426 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   18688.73 ms /   487 tokens


Assistant: This statement is partially correct. The fuel and EVAP (Emission Vapor) system does include the fuel tank, EVAP canister, vacuum lines, and the fuel filler neck. However, it is important to note that the system also includes other components such as the fuel pump, fuel pressure regulator, fuel injectors, and the fuel vapor recovery valve. Here's a more complete overview of the fuel and EVAP system:

1. Fuel Tank: This is the main storage container for the fuel. It is typically made of metal and has a vapor-tight seal to prevent fuel vapor leaks.

2. EVAP Canister: This is a sealed container that stores fuel vapors produced by the fuel in the tank. The canister is connected to the fuel tank by a vacuum line, allowing the fuel vapor to be transferred to the engine when needed.

3. Vacuum Lines: These lines connect the fuel tank and the EVAP canister to the engine. They allow the fuel vapor to be drawn into the engine and burned off, reducing emissions.

4. Fuel Filler Neck: Th

Llama.generate: 31 prefix-match hit, remaining 58 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    58 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   407 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   17933.03 ms /   465 tokens


Assistant: The statement is accurate. The vapor pressure sensor in the Evaporative Emission Control (EVAP) system indeed compares the pressure in the fuel tank to atmospheric pressure. This comparison is crucial for determining whether there is a significant difference between the two pressures, which could indicate a problem with the EVAP system.

To elaborate, the EVAP system is designed to prevent fuel vapors from escaping into the atmosphere. It does this by collecting the vapors in a charcoal canister and then periodically routing the vapors into the engine for combustion. The vapor pressure sensor is a key component in this system.

Here's how the vapor pressure sensor works:

1. **Pressure Comparison**: The sensor compares the pressure inside the fuel tank with the atmospheric pressure outside. This comparison is made by measuring the pressure difference between the two locations.

2. **System Operation**: If the pressure in the fuel tank is higher than atmospheric pressure, it 

Llama.generate: 30 prefix-match hit, remaining 63 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    63 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   366 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   16335.89 ms /   429 tokens


Assistant: The statement provided is correct. The onboard recovery valve, also known as the fill check valve, plays a crucial role in the refueling process. It is designed to manage the flow of fuel vapor, ensuring that the fueling operation runs smoothly and safely.

Here's a more detailed explanation:

1. **Function**: The onboard recovery valve's primary function is to monitor and control the flow of vapor that is generated during the refueling process. When the fuel tank is nearly full, the vapor pressure inside the tank increases, and this excess vapor needs to be vented out to prevent overpressure and potential fuel spillage.

2. **Valve Operation**: When the fuel tank is filling up, the onboard recovery valve opens, allowing the excess vapor to be vented out. It is designed to close when the fuel tank is full, preventing fuel from spilling out. This valve operates automatically and is essential in ensuring that the refueling operation is safe and efficient.

3. **Fill Check Valv

Llama.generate: 28 prefix-match hit, remaining 69 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    69 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   157 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    7795.51 ms /   226 tokens


Assistant: The statement "The onboard recovery valve is also known as the fill check valve and is responsible for venting excess vapor during refueling" is partially correct.

The onboard recovery valve (also known as the vapor valve or recovery valve) is a component in aircraft fuel systems. Its primary function is to vent excess vapor during the refueling process. However, it's not accurate to say that the onboard recovery valve is also known as the fill check valve. The fill check valve is a separate component in the refueling system that prevents fuel from flowing back into the fuel truck or aircraft fueling hose during refueling.

In summary, the onboard recovery valve and the fill check valve are related, but they serve different purposes in the refueling process. So, the statement is not entirely true.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T15:15:48.828164\nNo direct related knowledge found. Proceeding with general reasonin

Llama.generate: 28 prefix-match hit, remaining 55 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    55 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   383 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   18008.76 ms /   438 tokens


Assistant: To answer this question, we need to understand the context of removing a fill check valve, which is typically found in the plumbing system of a building or a piece of equipment. The fill check valve is a component that prevents backflow of water in a system. It is often rusted or corroded, and may require some preparation before it can be removed.

Here's a step-by-step process to determine how many pieces of rust must be removed to take off the fill check valve:

1. **Identify the location and condition of the valve**: Locate the fill check valve in the system. Assess the extent of rust and corrosion that has accumulated on the valve.

2. **Determine the severity of rust**: Rust and corrosion can be in various stages, ranging from light to heavy. If the rust is light, you might be able to remove it with a wire brush or a rust remover. If it is heavy, you might need to use a chisel or hammer to remove it.

3. **Estimate the number of rust pieces**: Once you've assessed the s

Llama.generate: 28 prefix-match hit, remaining 48 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    48 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   400 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   18173.68 ms /   448 tokens


Assistant: An Evaporative Emission Control System (EVAP) canister is secured in place to ensure that it remains functional and doesn't become dislodged or damaged during the vehicle's operation. Here's an overview of how an EVAP canister is typically secured:

1. Mounting: The EVAP canister is mounted to the vehicle's frame or chassis using one or more brackets. These brackets are usually attached to the canister via bolts or screws. The specific mounting location and number of brackets vary depending on the vehicle's design.

2. Sealing: To prevent fuel vapors from leaking around the mounting points, a sealant or gasket is used between the canister and the mounting brackets. This sealant helps maintain a tight seal and prevents fuel vapor emissions.

3. Reinforcement: In some cases, additional reinforcement may be added to the mounting points to ensure that the EVAP canister remains secure during operation. This can include additional brackets, straps, or other reinforcement component

Llama.generate: 31 prefix-match hit, remaining 97 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    97 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   247 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11644.83 ms /   344 tokens


Assistant: To answer the question about how an EVAP (Evaporative Emission Control System) canister is secured in place, let's analyze the provided options:

1. "With two 12mm pieces of rust" - Rust is a result of metal corrosion and is not a method of securing. This option is incorrect.

2. "With four 14mm bolts" - Bolts are a common method for securing components in vehicles. Four 14mm bolts could be a plausible method for securing the EVAP canister.

3. "With two 14mm bolts" - Similar to the previous option, this suggests the use of bolts, which are indeed used for securing parts in vehicles. Two 14mm bolts could be a feasible method.

4. "With six pieces of rust" - Again, rust is not a method of securing. This option is incorrect.

While both options 2 and 3 suggest the use of bolts, it is more common for EVAP canisters to be secured with four bolts rather than just two. This is because four bolts provide a more secure and stable installation. 

Therefore, the most likely correct an

Llama.generate: 28 prefix-match hit, remaining 51 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    51 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   168 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    8113.95 ms /   219 tokens


Assistant: The bolt size used for securing a strap onto a gas tank can vary depending on the specific manufacturer, model, and intended application. However, a common size for securing straps onto gas tanks is a 5/16-inch bolt, which is often used in the automotive industry. 

To give you a better idea of the size, a 5/16-inch bolt has a diameter of about 7.95 millimeters. It's important to ensure that the bolt size used is appropriate for the strap and the gas tank, as using the wrong size can lead to safety concerns.

In some cases, other bolt sizes may be used, such as 3/8-inch or 7/16-inch bolts. It's always best to consult the manufacturer's specifications or guidelines for the specific gas tank and strap being used.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T18:02:32.204775\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"question": "What bolt size is used for securing a s

Llama.generate: 31 prefix-match hit, remaining 84 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    84 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   139 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6942.48 ms /   223 tokens


Assistant: The bolt size used for securing a strap onto a gas tank can vary depending on the specific requirements of the application, but 16mm is a common size for securing straps to gas tanks. 

This size is large enough to provide adequate strength for securing the strap to the tank, while still being small enough to fit into the holes typically found on gas tanks. 

12mm, 14mm, and 10mm bolts are also common in various applications, but they are typically used for other types of fastening and may not be strong enough to secure a strap to a gas tank. 

Therefore, the most likely answer among the options provided is 16mm.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T18:04:40.206840\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"question": "What component limits the maximum level of gasoline in the tank?”,       "options": [         "The fuel pump",         "The fuel filler nec

Llama.generate: 31 prefix-match hit, remaining 86 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    86 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   134 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    6696.04 ms /   220 tokens


Assistant: The component that limits the maximum level of gasoline in the tank is the fuel filler neck. The fuel filler neck is a tube that connects the fuel tank to the fuel filler port on the vehicle. It has a vent that allows air to flow in and out of the tank as fuel is added. At the bottom of the filler neck is a restrictor, which is a small hole that limits the amount of fuel that can enter the tank at any one time. This restrictor ensures that the tank cannot be overfilled, protecting the vehicle and its fuel system from potential damage. The other components listed are not directly responsible for limiting the maximum level of gasoline in the tank.
[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-24T18:06:32.270583\nNo direct related knowledge found. Proceeding with general reasoning.\n'}, {'role': 'user', 'content': '"question": "What is the primary job of the baffles inside the gas tank?”,       "options": [         "To increase fuel

Llama.generate: 31 prefix-match hit, remaining 96 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    96 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   229 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10786.36 ms /   325 tokens


Assistant: The primary job of the baffles inside the gas tank is to prevent fuel from sloshing back and forth. Baffles are strategically placed inside the fuel tank to create compartments that restrict the movement of fuel, which prevents fuel from sloshing and spilling during vehicle acceleration, braking, or turning. This helps maintain the fuel level at a stable position and ensures a consistent fuel supply to the engine.

The other options listed are not the primary job of baffles:

1. Increasing fuel capacity: The baffles do not contribute to increasing the fuel capacity of the tank. In fact, they can slightly reduce the effective fuel storage space by creating additional compartments.

2. Filtering impurities from fuel: While fuel filters are used to remove impurities from the fuel, the baffles in the fuel tank do not play a significant role in this process. Fuel filters are typically located in the fuel lines between the fuel tank and the engine.

3. Cooling the fuel: Baffles ar

Llama.generate: 30 prefix-match hit, remaining 86 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    86 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   305 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   14256.99 ms /   391 tokens


Assistant: The question provided does not directly relate to the given options. However, if the question is related to a scenario involving a tank and atmospheric pressure, the most likely correct answer would be:

"When the tank is pressurized above atmospheric pressure."

This answer makes sense in the context of fluid dynamics and atmospheric pressure. Here's a breakdown:

1. "When the tank is pressurized above atmospheric pressure": This suggests that the tank's pressure is greater than the surrounding atmospheric pressure. When this happens, the fluid inside the tank can be forced out through a valve or opening, regardless of the atmospheric pressure on the other side.

2. "As long as that side is at atmospheric pressure": This implies that if one side of the tank is at atmospheric pressure, fluid can flow from the pressurized side to the atmospheric side. This is true, as fluid will flow from a region of high pressure to a region of low pressure.

3. "When the engine is running":

Llama.generate: 31 prefix-match hit, remaining 97 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    97 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   258 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   11907.38 ms /   355 tokens


Assistant: The function of the check valves in the EVAP (Evaporative Emission Control System) charcoal canister is to "Allow air in but block air out." This is necessary for maintaining the proper operation of the system.

In the EVAP system, the charcoal canister stores gasoline vapors that are emitted from the fuel tank. The system's goal is to prevent these vapors from escaping into the atmosphere. To achieve this, the system uses a series of valves and sensors.

The check valves are crucial in this process. When the engine is running, the intake manifold vacuum draws the vapor stored in the charcoal canister into the intake manifold and then into the engine for combustion. This is facilitated by check valves that allow air to enter the canister but prevent vapor from escaping back into the atmosphere.

When the engine is not running, the check valves block the air flow, keeping the vapors trapped in the canister. This prevents them from escaping into the atmosphere until the next t

Llama.generate: 31 prefix-match hit, remaining 97 prompt tokens to eval
llama_perf_context_print:        load time =    1613.15 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    97 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   296 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   13676.78 ms /   393 tokens


Assistant: The function of the check valves in the EVAP (Evaporative Emission Control System) charcoal canister is to "Allow air in but block air out". This is the correct option among the given choices.

The EVAP system is designed to capture gasoline vapors that may escape from the fuel tank and fuel lines. It does this by using a charcoal canister to store the vapors. The canister is connected to the fuel tank and the engine intake manifold through a series of valves, including check valves.

The purpose of the check valves is to allow a one-way flow of air and vapors. Specifically, they allow air to flow from the engine intake manifold into the charcoal canister to purge the stored vapors, but they prevent air from flowing directly from the atmosphere into the fuel tank. This is important because it maintains a vacuum in the fuel tank, which helps to keep the gasoline vapors contained.

The other options listed are not correct for the check valves in the EVAP system:

- "Allow air 

: 

## EVALUTION PIPELINE

In [4]:
import json
import os
import pickle
from datetime import datetime
from llama_cpp import Llama
import threading
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2'):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        self.llm = LlamaSingleton().llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)


    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response(self, user_message):
        knowledge_matches = self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Answer based on retrieved knowledge, but only if it relates to the question:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Videotimestamps: start: {t['start']}, end: {t['end']})\n"
            
        else:
            system_message += "\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}] 
        enriched_history.append({"role": "user", "content": user_message})
        print(enriched_history)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.5,
        )['choices'][0]['message']['content']
        return response

    def chat(self):
        print("Chatbot is ready! Type 'exit' to end the conversation.")
        while True:
            user_message = input("You: ")
            if user_message.lower().strip() in ['exit', 'quit']:
                print("Chatbot: Goodbye!")
                break
            assistant_response = self.generate_response(user_message)
            print(f"Assistant: {assistant_response}")

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.chat()

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 26578 MiB free
llama_model_loader: loaded meta data with 41 key-value pairs and 339 tensors from models/Qwen2.5-7B-Instruct.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 7B Instruct Uncensored
llama_model_loader: - kv   3:                           general.finetune str              = Instruct-Uncensored
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 7B
llama_model_loader: - kv   6:               

Chatbot is ready! Type 'exit' to end the conversation.


  current_time = datetime.utcnow().isoformat()


[{'role': 'system', 'content': 'You are a helpful assistent; Current date and time: 2025-02-25T18:49:12.495904\nAnswer based on retrieved knowledge:\n- valve is itself (Videotimestamps: start: 386.586, end: 387.967)\n- thing expected_to_snap soon (Videotimestamps: start: 35.679, end: 41.803)\n- spring is present in here (Videotimestamps: start: 378.841, end: 385.745)\n- check valve is present in here (Videotimestamps: start: 378.841, end: 385.745)\n- filter is present in here (Videotimestamps: start: 378.841, end: 385.745)\n'}, {'role': 'user', 'content': 'damn'}]


llama_perf_context_print:        load time =    2169.32 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   232 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   210 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   10846.34 ms /   442 tokens


Assistant: Based on the timestamps provided, it seems like the context is related to a mechanical or engineering system, possibly a hydraulic or pneumatic system. Here's a summary of the key points:

1. A valve is mentioned, and it's being described as itself (likely referring to a specific valve type or component).

2. Something expected to snap soon is mentioned, which could refer to a part like a spring or a valve component that's nearing the end of its lifespan.

3. A spring is present, which could be part of a valve mechanism or another component that operates by spring tension.

4. Check valves, filters, and other components are also present, suggesting that the system being described is complex and has multiple parts with specific functions.

Given this information, it seems like the system might be experiencing some wear and tear, with a part (possibly a spring) expected to fail soon. The presence of check valves, filters, and other components suggests that the system is design

Llama.generate: 33 prefix-match hit, remaining 222 prompt tokens to eval
llama_perf_context_print:        load time =    2169.32 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   222 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    79 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5195.39 ms /   301 tokens


Assistant: Based on the provided timestamps from the video, it seems that the vapors enter from one side and exit from the other side of the EVAP canister. This is indicated in the video segment starting at 554.708 and ending at 579.948. Therefore, the exit point for the vapors is the opposite side from where they enter.
Chatbot: Goodbye!


## EVALUATIE SCRIPT

In [23]:
import json
import os
import pickle
import threading
import sys
from datetime import datetime
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from json import JSONDecodeError

# -------------------------------
# The Chatbot and LlamaSingleton
# -------------------------------

class LlamaSingleton:
    _instance = None
    _lock = threading.Lock()

    def __new__(cls, model_path="models/Qwen2.5-7B-Instruct.Q4_K_M.gguf", chat_format="chatml"):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super(LlamaSingleton, cls).__new__(cls)
                cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
            return cls._instance

class Chatbot:
    def __init__(self, 
                 messages_file='messages.json', 
                 knowledge_file='knowledge.json', 
                 faiss_index_file='faiss_index.pkl',
                 model_name='all-MiniLM-L6-v2',
                 llm_model_path="models/Qwen2.5-7B-Instruct.Q4_K_M.gguf"):
        self.messages_file = messages_file
        self.knowledge_file = knowledge_file
        self.faiss_index_file = faiss_index_file
        # Instantiate LlamaSingleton with the provided model path
        self.llm = LlamaSingleton(model_path=llm_model_path).llm
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.knowledge_data = []
        self.initialize_files()
        self.load_faiss_index()

    def initialize_files(self):
        for file in [self.messages_file, self.knowledge_file]:
            if not os.path.exists(file):
                with open(file, 'w') as f:
                    json.dump([], f)

    def load_json_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def load_faiss_index(self):
        if os.path.exists(self.faiss_index_file):
            with open(self.faiss_index_file, 'rb') as f:
                self.index, self.knowledge_data = pickle.load(f)
        else:
            self.index = None
            self.knowledge_data = []

    def search_knowledge(self, query, top_k=5):
        if self.index is None or len(self.knowledge_data) == 0:
            return []
        query_embedding = self.model.encode([query])
        distances, indices = self.index.search(np.array(query_embedding, dtype=np.float32), top_k)
        results = []
        for idx in indices[0]:
            if idx == -1:
                continue
            results.append(self.knowledge_data[idx])
        return results

    def generate_response_with_kb(self, user_message):
        knowledge_matches = self.search_knowledge(user_message, top_k=5)
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        if knowledge_matches:
            system_message += "Choose one answer based on retrieved knowledge, if it relates to the question:\n"
            for t in knowledge_matches:
                system_message += f"- {t['subject']} {t['predicate']} {t['object']} (Videotimestamps: start: {t['start']}, end: {t['end']})\n"
        else:
            system_message += "\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent; {system_message}"}]
        enriched_history.append({"role": "user", "content": user_message})
        print(knowledge_matches)
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.5,
        )['choices'][0]['message']['content']
        return response

    def generate_response_without_kb(self, user_message):
        current_time = datetime.utcnow().isoformat()
        system_message = f"Current date and time: {current_time}\n"
        enriched_history = [{"role": "system", "content": f"You are a helpful assistent. Choose one answer; {system_message}"}]
        enriched_history.append({"role": "user", "content": user_message})
        response = self.llm.create_chat_completion(
            messages=enriched_history,
            temperature=0.5,
        )['choices'][0]['message']['content']
        return response

# -------------------------------
# Evaluation: Looping Over Questions and Generating Responses
# -------------------------------

def run_evaluation(llm_model_path, questions_path):
    # Load the questions from the provided JSON file
    with open(questions_path, 'r', encoding='utf-8') as f:
        questions = json.load(f)

    # Instantiate Chatbot using the provided model path
    chatbot = Chatbot(llm_model_path=llm_model_path)

    # Loop over all questions
    for question in questions:
        # Compose a prompt that includes the question text and options.
        prompt = f"Question: {question['question']}\nOptions:\n"
        for idx, opt in enumerate(question['options']):
            prompt += f"{idx+1}. {opt}\n"

        # Generate responses using the two methods.
        response_with_kb = chatbot.generate_response_with_kb(prompt)
        response_without_kb = chatbot.generate_response_without_kb(prompt)

        # Add the responses to the question JSON object.
        question["llm_answer_with_kb"] = response_with_kb
        question["llm_answer_without_kb"] = response_without_kb

    # Derive the LLM name from the model path.
    llm_name = os.path.splitext(os.path.basename(llm_model_path))[0]
    result_filename = f"result_eval_{llm_name}.json"

    # Save the updated questions JSON to the new file.
    with open(result_filename, 'w', encoding='utf-8') as f:
        json.dump(questions, f, indent=4)

    print(f"Evaluation complete. Results saved to {result_filename}")

if __name__ == "__main__": 
    # Check command-line arguments
    model_paths = ["models/Qwen2.5-0.5B-Instruct-f16.gguf",
                   "models/Llama-3.2-1B-Instruct-f16.gguf",
                   "models/gemma-2-2b-it.F16.gguf",
                   "models/Llama-3.2-3B-Instruct-f16.gguf",
                   "models/Qwen2.5-7B-Instruct-Q4_K_M.gguf",
                   "models/Llama-3.2-11B-Vision-Instruct.Q4_K_M.gguf",
                   "models/gemma-2-9b-it-Q4_K_M.gguf",
                   "models/phi-4-14b-Q4_K_M.gguf",
                   ]
    
    for model_path in model_paths:
        questions_path = "questions.json"
        run_evaluation(model_path, questions_path)
    
    # model_path = "models/Llama-3.2-3B-Instruct-Q4_K_M.gguf"
    # questions_path = "questions.json"
    # run_evaluation(model_path, questions_path)

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 26310 MiB free
llama_model_loader: loaded meta data with 34 key-value pairs and 290 tensors from models/Qwen2.5-0.5B-Instruct-f16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 0.5B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 0.5B
llama_model_loader: - kv   6:                            genera

[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   314 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     432.15 ms /   329 tokens
  current_time = datetime.utcnow().isoformat()
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     267.60 ms /   108 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    37 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     625.46 ms /   336 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    22 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     368.21 ms /   116 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     506.28 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     347.77 ms /   115 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    38 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     628.20 ms /   344 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    65 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     784.62 ms /   166 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    34 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     587.28 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    17 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     374.08 ms /    96 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     449.83 ms /   307 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   203 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2148.19 ms /   295 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     369.76 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    90 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1024.13 ms /   190 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    18 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     471.10 ms /   324 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    73 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     876.40 ms /   176 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    62 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     865.03 ms /   349 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    90 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1068.34 ms /   189 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    14 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     424.85 ms /   318 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    91 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1055.95 ms /   183 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     388.51 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    41 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     854.62 ms /   139 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    73 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     949.22 ms /   393 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   111 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1403.21 ms /   221 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    23 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     618.55 ms /   345 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     713.11 ms /   142 tokens


Evaluation complete. Results saved to result_eval_Qwen2.5-0.5B-Instruct-f16.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     889.52 ms /   344 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    73 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     931.19 ms /   173 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    13 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     391.97 ms /   312 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    42 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     539.24 ms /   136 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     521.21 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     351.22 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    40 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     630.00 ms /   346 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     230.95 ms /   110 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     451.01 ms /   303 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    30 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     458.21 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   110 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1423.04 ms /   397 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    70 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1117.20 ms /   162 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     316.98 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    74 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     877.69 ms /   174 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    18 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     597.84 ms /   324 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    12 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     388.52 ms /   115 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    54 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     880.83 ms /   341 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    10 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     486.60 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    18 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     459.02 ms /   322 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    99 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1120.70 ms /   191 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     326.12 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    22 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     393.27 ms /   120 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    68 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     919.12 ms /   388 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    85 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1512.50 ms /   195 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     906.84 ms /   361 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1046.85 ms /   142 tokens


Evaluation complete. Results saved to result_eval_Llama-3.2-1B-Instruct-f16.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    45 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1458.80 ms /   350 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     6 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     702.09 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     558.77 ms /   318 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    66 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     851.91 ms /   160 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     600.83 ms /   311 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     343.03 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    79 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1143.04 ms /   385 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    52 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     685.90 ms /   153 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    16 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     378.92 ms /   299 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    28 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     406.90 ms /   107 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   159 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1759.81 ms /   446 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   234 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    3437.77 ms /   326 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     360.86 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     6 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     341.93 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    18 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     409.88 ms /   324 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   104 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1161.09 ms /   207 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    73 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     937.35 ms /   360 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    82 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     962.94 ms /   181 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     637.07 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    29 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     453.59 ms /   121 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     366.24 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     384.07 ms /   123 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    74 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1008.85 ms /   394 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    88 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1065.46 ms /   198 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    23 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     504.13 ms /   345 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     661.23 ms /   142 tokens


Evaluation complete. Results saved to result_eval_gemma-2-2b-it.F16.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     807.55 ms /   344 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     8 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     257.88 ms /   108 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    36 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     703.10 ms /   335 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    13 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     318.07 ms /   107 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     504.81 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     451.22 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    45 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     785.27 ms /   351 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    57 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     741.24 ms /   158 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     587.47 ms /   322 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    31 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     434.11 ms /   110 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   123 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1387.58 ms /   410 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   234 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2462.28 ms /   326 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     342.26 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     337.73 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     638.83 ms /   326 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    69 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     918.58 ms /   172 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    66 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     958.83 ms /   353 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    10 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     487.90 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     410.01 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    87 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1010.11 ms /   179 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    55 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     869.51 ms /   349 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     374.99 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    67 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1171.17 ms /   387 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    84 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1063.31 ms /   194 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    29 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     623.58 ms /   351 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     559.45 ms /   142 tokens


Evaluation complete. Results saved to result_eval_Llama-3.2-3B-Instruct-f16.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    70 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     978.20 ms /   375 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     412.65 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    13 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     454.82 ms /   312 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    46 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     733.62 ms /   140 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    47 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     804.63 ms /   339 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     471.10 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    38 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     750.80 ms /   344 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     243.34 ms /   110 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    67 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     922.97 ms /   350 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    32 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     511.95 ms /   111 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    26 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     478.99 ms /   313 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    72 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     841.10 ms /   164 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     360.83 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    79 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     931.98 ms /   179 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    12 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     430.37 ms /   318 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    12 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     895.77 ms /   115 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    41 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     647.56 ms /   328 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    54 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     796.96 ms /   153 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     398.08 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    98 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1247.89 ms /   190 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     374.31 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    41 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     548.13 ms /   139 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    68 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1037.37 ms /   388 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   101 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1220.86 ms /   211 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     680.68 ms /   361 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    74 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     874.06 ms /   177 tokens


Evaluation complete. Results saved to result_eval_Qwen2.5-7B-Instruct-Q4_K_M.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    55 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2319.07 ms /   360 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     7 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     847.29 ms /   107 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     552.09 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    58 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     722.89 ms /   152 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     428.29 ms /   311 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     338.06 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    29 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     521.89 ms /   335 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     343.35 ms /   120 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     223.97 ms /   284 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    27 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     467.21 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    51 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     742.46 ms /   338 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   422 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4406.37 ms /   514 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     355.17 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     243.60 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     424.78 ms /   325 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    61 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     991.63 ms /   164 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    43 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     651.02 ms /   330 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    10 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     254.11 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     445.80 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    78 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     927.17 ms /   170 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     385.36 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    41 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     585.48 ms /   139 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    78 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     975.65 ms /   398 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   102 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1131.57 ms /   212 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    23 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     494.07 ms /   345 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    35 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     490.29 ms /   138 tokens


Evaluation complete. Results saved to result_eval_Llama-3.2-11B-Vision-Instruct.Q4_K_M.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    47 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     739.10 ms /   352 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    98 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1131.31 ms /   198 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    48 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     814.51 ms /   347 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    26 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     466.76 ms /   120 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     596.25 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     343.71 ms /   114 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    42 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     643.34 ms /   348 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    67 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     808.03 ms /   168 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    59 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2910.67 ms /   342 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    42 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     580.89 ms /   121 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    79 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1042.23 ms /   366 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   397 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4390.30 ms /   489 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     371.30 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     6 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     234.54 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    56 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     860.22 ms /   362 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    12 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     261.47 ms /   115 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    58 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     771.12 ms /   345 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    71 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1002.92 ms /   170 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    61 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     835.61 ms /   365 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    86 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     991.02 ms /   178 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    23 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     536.42 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    64 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     783.65 ms /   162 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    67 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     911.31 ms /   387 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    75 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     878.93 ms /   185 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    44 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     660.33 ms /   366 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    33 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     473.03 ms /   136 tokens


Evaluation complete. Results saved to result_eval_gemma-2-9b-it-Q4_K_M.json
[{'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'EVAP line', 'predicate': 'is held on by', 'object': 'four pieces of rust', 'start': 185.836, 'end': 194.882}, {'subject': 'canister', 'predicate': 'is_held_in_by', 'object': 'two 12mm pieces of rust', 'start': 15.2, 'end': 28.755}, {'subject': 'evap canister', 'predicate': 'has', 'object': 'two lines', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP_canister', 'predicate': 'can_be_removed', 'object': 'from above the subframe', 'start': 15.2, 'end': 28.755}]


Llama.generate: 9 prefix-match hit, remaining 305 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   305 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    56 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     786.26 ms /   361 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     6 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     229.92 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 299 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'EVAP canister', 'predicate': 'location', 'object': 'underneath the vehicle on the right side', 'start': 1.448, 'end': 15.18}, {'subject': 'gas tank', 'predicate': 'will be opened', 'object': "to see what's inside", 'start': 281.796, 'end': 303.949}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   299 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    36 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     583.43 ms /   335 tokens
Llama.generate: 9 prefix-match hit, remaining 94 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    94 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    35 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     508.55 ms /   129 tokens
Llama.generate: 9 prefix-match hit, remaining 292 prompt tokens to eval


[{'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'gas tank', 'predicate': 'fixing', 'object': 'two straps', 'start': 29.356, 'end': 35.579}, {'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'location', 'object': 'underneath the rear seat of the vehicle', 'start': 29.356, 'end': 35.579}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   292 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    36 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     571.17 ms /   328 tokens
Llama.generate: 9 prefix-match hit, remaining 95 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    95 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    32 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     439.91 ms /   127 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'fuel filler neck', 'predicate': 'held in by', 'object': '14mm bolt', 'start': 43.564, 'end': 148.419}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}, {'subject': 'fuel filler neck', 'predicate': 'contains', 'object': 'a one-way check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'two_14mm_bolts', 'predicate': 'hold', 'object': 'strap_to_gas_tank', 'start': 35.679, 'end': 41.803}, {'subject': 'Filler neck', 'predicate': 'contains', 'object': 'hollow tube', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    65 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     847.36 ms /   371 tokens
Llama.generate: 9 prefix-match hit, remaining 101 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   101 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     231.95 ms /   110 tokens
Llama.generate: 9 prefix-match hit, remaining 283 prompt tokens to eval


[{'subject': 'tank side', 'predicate': 'disconnect', 'object': 'two ventilation hoses', 'start': 180.282, 'end': 183.305}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'vacuum_hoses', 'predicate': 'have_been_disconnected', 'object': 'from this side of the EVAP canister', 'start': 15.2, 'end': 28.755}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'Fuel tank vapor pressure sensor', 'predicate': 'has', 'object': '3 terminals', 'start': 390.148, 'end': 420.44}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   283 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    26 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     527.38 ms /   309 tokens
Llama.generate: 9 prefix-match hit, remaining 79 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    79 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    31 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     429.30 ms /   110 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'needs removal', 'object': 'six pieces of rust', 'start': 196.161, 'end': 222.294}, {'subject': 'pulling out', 'predicate': 'action', 'object': 'check valves', 'start': 489.851, 'end': 494.159}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}, {'subject': 'Ball check valve', 'predicate': 'is part of', 'object': 'the check valve', 'start': 496.587, 'end': 534.03}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   174 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1987.93 ms /   461 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   241 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2632.70 ms /   333 tokens
Llama.generate: 9 prefix-match hit, remaining 311 prompt tokens to eval


[{'subject': 'fill check valve', 'predicate': 'opens valve', 'object': 'inside canister', 'start': 224.015, 'end': 264.245}, {'subject': 'fill check valve', 'predicate': 'allows', 'object': 'excess vapor to pass into the canister', 'start': 148.639, 'end': 180.042}, {'subject': 'fill check valve', 'predicate': 'allows escape of vapor', 'object': 'when gas tank is rapidly filling up with vapor', 'start': 224.015, 'end': 264.245}, {'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'check valve', 'predicate': 'is present in', 'object': 'here', 'start': 378.841, 'end': 385.745}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   311 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     9 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     426.32 ms /   320 tokens
Llama.generate: 9 prefix-match hit, remaining 100 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   100 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     6 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     259.01 ms /   106 tokens
Llama.generate: 9 prefix-match hit, remaining 306 prompt tokens to eval


[{'subject': 'evap float', 'predicate': 'prevents', 'object': 'liquid gasoline', 'start': 196.161, 'end': 222.294}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'float', 'predicate': 'prevents', 'object': 'liquid from going inside your charcoal canister', 'start': 266.787, 'end': 280.936}, {'subject': 'fuel tank', 'predicate': 'has', 'object': 'evap canister', 'start': 43.564, 'end': 148.419}, {'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    20 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     581.23 ms /   326 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    12 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     278.55 ms /   115 tokens
Llama.generate: 9 prefix-match hit, remaining 287 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'construction material', 'object': 'sheet metal', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'number of parts', 'object': 'two halves', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'material', 'object': 'big steel', 'start': 29.356, 'end': 35.579}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}, {'subject': 'gas tank', 'predicate': 'joining method', 'object': 'welding', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   287 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    30 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     527.99 ms /   317 tokens
Llama.generate: 9 prefix-match hit, remaining 99 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    99 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    83 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     958.00 ms /   182 tokens
Llama.generate: 9 prefix-match hit, remaining 304 prompt tokens to eval


[{'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}, {'subject': 'fuel pump', 'predicate': 'is located', 'object': 'at the lowest part of the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'gas filler neck', 'predicate': 'is part of', 'object': 'fuel system', 'start': 367.711, 'end': 378.521}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   304 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    15 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     376.65 ms /   319 tokens
Llama.generate: 9 prefix-match hit, remaining 92 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    92 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    64 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     772.51 ms /   156 tokens
Llama.generate: 9 prefix-match hit, remaining 294 prompt tokens to eval


[{'subject': 'gas tank', 'predicate': 'internal feature', 'object': 'baffles', 'start': 308.171, 'end': 325.335}, {'subject': 'baffle', 'predicate': 'prevents', 'object': 'fuel sloshing', 'start': 325.575, 'end': 365.911}, {'subject': 'baffles', 'predicate': 'location', 'object': 'perimeter of the tank', 'start': 308.171, 'end': 325.335}, {'subject': 'remaining tank area', 'predicate': 'is used for', 'object': 'fuel vapor accumulation', 'start': 325.575, 'end': 365.911}, {'subject': 'gas tank', 'predicate': 'construction method', 'object': 'stamping', 'start': 308.171, 'end': 325.335}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   294 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     348.95 ms /   305 tokens
Llama.generate: 9 prefix-match hit, remaining 98 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    98 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    11 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     391.40 ms /   109 tokens
Llama.generate: 9 prefix-match hit, remaining 320 prompt tokens to eval


[{'subject': 'check valve', 'predicate': 'allows fluid entry', 'object': 'into the tank', 'start': 325.575, 'end': 365.911}, {'subject': 'diaphragm', 'predicate': 'can move', 'object': 'up and down', 'start': 281.796, 'end': 303.949}, {'subject': 'Check valve', 'predicate': 'acts as', 'object': 'a valve that releases air when a certain pressure is reached', 'start': 496.587, 'end': 534.03}, {'subject': 'fuel check valve', 'predicate': 'contains', 'object': 'lid', 'start': 281.796, 'end': 303.949}, {'subject': 'maximum fuel level', 'predicate': 'is limited by', 'object': 'the height of the fuel fill check valve', 'start': 325.575, 'end': 365.911}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   320 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    56 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     896.46 ms /   376 tokens
Llama.generate: 9 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    25 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     466.62 ms /   135 tokens
Llama.generate: 9 prefix-match hit, remaining 322 prompt tokens to eval


[{'subject': 'EVAP charcoal canister', 'predicate': 'action', 'object': 'absorb gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'EVAP charcoal canister', 'predicate': 'responsibility', 'object': 'hold gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'check valves', 'predicate': 'function', 'object': 'control flow of gasoline vapors', 'start': 469.037, 'end': 482.005}, {'subject': 'vacuum switching valve', 'predicate': 'allows', 'object': 'air flow from gas tank to charcoal canister', 'start': 435.808, 'end': 468.877}, {'subject': 'vacuum switching valve', 'predicate': 'functions as', 'object': 'redirecting air flow between gas tank and charcoal canister', 'start': 435.808, 'end': 468.877}]


llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   322 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    23 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     453.61 ms /   345 tokens
Llama.generate: 9 prefix-match hit, remaining 103 prompt tokens to eval
llama_perf_context_print:        load time =     286.82 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   103 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    94 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1065.37 ms /   197 tokens


Evaluation complete. Results saved to result_eval_phi-4-14b-Q4_K_M.json


## OVERIG: STAGE KNOWLEDGE EXTRACTION MET EXTRA VERIFICATION STEP (NOT IN FINAL BUILD)

In [2]:
# import json
# import os
# import threading
# from datetime import datetime
# from json import JSONDecodeError

# from chonkie import SentenceChunker
# from llama_cpp import Llama
# from sentence_transformers import SentenceTransformer

# # -------------------------------
# # The Chatbot and LlamaSingleton
# # -------------------------------

# class LlamaSingleton:
#     _instance = None
#     _lock = threading.Lock()

#     def __new__(cls, model_path="models/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf", chat_format="chatml"):
#         with cls._lock:
#             if cls._instance is None:
#                 cls._instance = super(LlamaSingleton, cls).__new__(cls)
#                 cls._instance.llm = Llama(model_path=model_path, chat_format=chat_format, n_ctx=2048)
#             return cls._instance

# class Chatbot:
#     def __init__(self, 
#                  messages_file='messages.json', 
#                  knowledge_file='knowledge.json', 
#                  model_name='all-MiniLM-L6-v2'):
#         self.messages_file = messages_file
#         self.knowledge_file = knowledge_file
#         self.llm = LlamaSingleton().llm
#         self.model = SentenceTransformer(model_name)
#         self.knowledge_data = []
#         self.initialize_files()
#         self.load_knowledge()

#     def initialize_files(self):
#         for file in [self.messages_file, self.knowledge_file]:
#             if not os.path.exists(file):
#                 with open(file, 'w') as f:
#                     json.dump([], f)

#     def load_json_data(self, file_path):
#         with open(file_path, 'r') as f:
#             return json.load(f)

#     def save_json_data(self, file_path, data):
#         with open(file_path, 'w') as f:
#             json.dump(data, f, indent=4)

#     def load_knowledge(self):
#         self.knowledge_data = self.load_json_data(self.knowledge_file)

#     def extract_valuable_knowledge(self, text_chunk):
#         """
#         Use a two-stage chain-of-thought prompting strategy:
#           1. Extract entities, contextual metadata, and any compound/n-ary relations from the text.
#           2. Reason about the relationships among the extracted entities, self-critique the result, and refine the output.
#         """
#         # Stage 1: Extraction of entities and context
#         stage1_prompt = (
#             "You are an advanced information extractor. Given the text below, extract all relevant entities, "
#             "contextual metadata (e.g., document context, sentiment, source reliability), and any preliminary relationships "
#             "or compound relations that may exist. Return ONLY JSON following this schema:\n\n"
#             "{\n"
#             '  "entities": [{"name": "EntityName", "type": "EntityType"}],\n'
#             '  "prelim_relations": [\n'
#             "      { \"entities\": [\"EntityName1\", \"EntityName2\"], \"relation_hint\": \"...\" }\n"
#             "  ],\n"
#             '  "context": "Brief context extracted from the document",\n'
#             '  "sentiment": "positive/negative/neutral",\n'
#             '  "source_reliability": "high/medium/low"\n'
#             "}\n\n"
#             "If no relevant information is found, return empty values (e.g., empty arrays or empty strings).\n\n"
#             "Text:\n"
#             f"{text_chunk}"
#         )
#         stage1_response = self.llm.create_chat_completion(
#             messages=[
#                 {"role": "system", "content": "You are a precise and methodical information extractor."},
#                 {"role": "user", "content": stage1_prompt},
#             ],
#             temperature=0.5,
#         )

#         try:
#             stage1_data = json.loads(stage1_response['choices'][0]['message']['content'])
#         except (JSONDecodeError, KeyError):
#             stage1_data = {
#                 "entities": [],
#                 "prelim_relations": [],
#                 "context": "",
#                 "sentiment": "neutral",
#                 "source_reliability": "medium"
#             }

#         # Stage 2: Reasoning about relationships
#         stage2_prompt = (
#             "Based on the following extracted information:\n\n"
#             f"Entities: {json.dumps(stage1_data.get('entities', []))}\n"
#             f"Preliminary Relations: {json.dumps(stage1_data.get('prelim_relations', []))}\n"
#             f"Context: {stage1_data.get('context', '')}\n"
#             f"Sentiment: {stage1_data.get('sentiment', 'neutral')}\n"
#             f"Source Reliability: {stage1_data.get('source_reliability', 'medium')}\n\n"
#             "Now, reason about the relationships among these entities. Identify any compound or n-ary relations, "
#             "and provide refined relations with a confidence score (between 0 and 1). Use a chain-of-thought process "
#             "to self-critique and refine your answer. Return ONLY JSON with the following schema:\n\n"
#             "{\n"
#             '  "knowledge": [\n'
#             "      {\n"
#             '          "entities": [{"name": "EntityName", "type": "EntityType"}],\n'
#             '          "relations": [\n'
#             "              { \"entities\": [\"EntityName1\", \"EntityName2\", ...], \"predicate\": \"...\", \"confidence\": 0.95 }\n"
#             "          ],\n"
#             '          "context": "The document context",\n'
#             '          "sentiment": "positive/negative/neutral",\n'
#             '          "source_reliability": "high/medium/low",\n'
#             '          "timestamp": "ISO8601 formatted timestamp"\n'
#             "      }\n"
#             "  ]\n"
#             "}\n\n"
#             "If no refined relations can be determined, return:\n"
#             '{"knowledge": []}'
#         )

#         stage2_response = self.llm.create_chat_completion(
#             messages=[
#                 {"role": "system", "content": "You are a reflective and expert knowledge aggregator."},
#                 {"role": "user", "content": stage2_prompt},
#             ],
#             temperature=0.5,
#         )

#         try:
#             knowledge_data = json.loads(stage2_response['choices'][0]['message']['content'])
#             if "knowledge" not in knowledge_data:
#                 knowledge_data["knowledge"] = []
#         except (JSONDecodeError, KeyError):
#             knowledge_data = {"knowledge": []}

#         print("Extracted and refined knowledge from a chunk:", knowledge_data)
#         return knowledge_data["knowledge"]

#     def save_knowledge(self, new_knowledge_items):
#         """
#         Merge new knowledge items with existing ones. Use simple ontology-based reasoning to merge
#         items that refer to the same entities or concepts.
#         """
#         if not new_knowledge_items:
#             return

#         existing_knowledge = self.load_json_data(self.knowledge_file)
#         merged_knowledge = existing_knowledge.copy()

#         # A simple merging strategy: if two knowledge items share the same context and similar entities,
#         # merge their relations. (In practice, you might use a more sophisticated ontology matching algorithm.)
#         for new_item in new_knowledge_items:
#             new_item['timestamp'] = datetime.utcnow().isoformat()
#             merged = False
#             for existing_item in merged_knowledge:
#                 if existing_item.get("context") == new_item.get("context"):
#                     # Compare entity sets (by names) for overlap
#                     existing_entities = {e["name"] for e in existing_item.get("entities", [])}
#                     new_entities = {e["name"] for e in new_item.get("entities", [])}
#                     if existing_entities & new_entities:
#                         # Merge relations (avoiding duplicates based on predicate and involved entities)
#                         existing_relations = existing_item.get("relations", [])
#                         for rel in new_item.get("relations", []):
#                             if rel not in existing_relations:
#                                 existing_relations.append(rel)
#                         existing_item["relations"] = existing_relations
#                         merged = True
#                         break
#             if not merged:
#                 merged_knowledge.append(new_item)

#         self.save_json_data(self.knowledge_file, merged_knowledge)
#         print("Knowledge saved. Total items:", len(merged_knowledge))

# # -------------------------------
# # Main Processing: Chunk and Extract Knowledge
# # -------------------------------

# def main():
#     # ----------------------------
#     # Part 1: Chunk the input file
#     # ----------------------------
#     # Initialize the SentenceChunker with desired parameters
#     chunker = SentenceChunker(
#         chunk_size=512,
#         chunk_overlap=128,
#         min_sentences_per_chunk=40  # adjust as needed for text length
#     )

#     # Read the contents of input.txt
#     with open('input.txt', 'r', encoding='utf-8') as file:
#         text = file.read()

#     # Get the list of chunks from the input text
#     chunks = chunker.chunk(text)
#     print(f"Total chunks created: {len(chunks)}")

#     # ----------------------------
#     # Part 2: Extract Knowledge from Each Chunk
#     # ----------------------------
#     chatbot = Chatbot()

#     # Process each chunk
#     for i, chunk in enumerate(chunks, start=1):
#         print(f"\nProcessing chunk {i} (Token count: {chunk.token_count}, Sentences: {len(chunk.sentences)})")
#         extracted_knowledge = chatbot.extract_valuable_knowledge(chunk.text)
#         if extracted_knowledge:
#             chatbot.save_knowledge(extracted_knowledge)

#     print("\nKnowledge extraction complete.")
#     print("Please check 'knowledge.json' for the extracted valuable knowledge.")

# if __name__ == "__main__":
#     main()

llama_model_load_from_file_impl: using device Metal (Apple M3 Max) - 26602 MiB free
llama_model_loader: loaded meta data with 27 key-value pairs and 339 tensors from models/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = DeepSeek R1 Distill Qwen 1.5B
llama_model_loader: - kv   3:                       general.organization str              = Deepseek Ai
llama_model_loader: - kv   4:                           general.basename str              = DeepSeek-R1-Distill-Qwen
llama_model_loader: - kv   5:                         general.size_label str              = 1.5B
llama_model_loader: - kv  

Total chunks created: 55


load_tensors: layer  23 assigned to device CPU
load_tensors: layer  24 assigned to device CPU
load_tensors: layer  25 assigned to device CPU
load_tensors: layer  26 assigned to device CPU
load_tensors: layer  27 assigned to device CPU
load_tensors: layer  28 assigned to device CPU
load_tensors: tensor 'token_embd.weight' (q8_0) (and 338 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead
load_tensors: offloading 0 repeating layers to GPU
load_tensors: offloaded 0/29 layers to GPU
load_tensors:   CPU_Mapped model buffer size =  1801.09 MiB
llama_init_from_model: n_seq_max     = 1
llama_init_from_model: n_ctx         = 2048
llama_init_from_model: n_ctx_per_seq = 2048
llama_init_from_model: n_batch       = 512
llama_init_from_model: n_ubatch      = 512
llama_init_from_model: flash_attn    = 0
llama_init_from_model: freq_base     = 10000.0
llama_init_from_model: freq_scale    = 1
llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full cap


Processing chunk 1 (Token count: 510, Sentences: 46)


llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   651 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1396 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   26720.63 ms /  2047 tokens
Llama.generate: 12 prefix-match hit, remaining 254 prompt tokens to eval
llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   254 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1781 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   32068.14 ms /  2035 tokens
Llama.generate: 12 prefix-match hit, remaining 628 prompt tokens to eval


Extracted and refined knowledge from a chunk: {'knowledge': []}

Processing chunk 2 (Token count: 502, Sentences: 45)


llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   628 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1407 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   26439.43 ms /  2035 tokens
Llama.generate: 12 prefix-match hit, remaining 254 prompt tokens to eval
llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   254 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1781 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   31369.40 ms /  2035 tokens
Llama.generate: 12 prefix-match hit, remaining 646 prompt tokens to eval


Extracted and refined knowledge from a chunk: {'knowledge': []}

Processing chunk 3 (Token count: 499, Sentences: 44)


llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   646 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1389 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   25853.42 ms /  2035 tokens
Llama.generate: 12 prefix-match hit, remaining 254 prompt tokens to eval
llama_perf_context_print:        load time =    1838.37 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   254 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1781 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   33184.86 ms /  2035 tokens
Llama.generate: 12 prefix-match hit, remaining 647 prompt tokens to eval


Extracted and refined knowledge from a chunk: {'knowledge': []}

Processing chunk 4 (Token count: 496, Sentences: 42)


KeyboardInterrupt: 

: 