In [None]:
# -*- coding: utf-8 -*-
"""Dynamic_Embedding_Storage_System.ipynb"""

import json
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from huggingface_hub import login
import os
import pandas as pd  # For visualization

In [None]:
class EmbeddingCalculator:
    def __init__(self, model_name, hf_token):
        """
        Initialize the embedding calculator with a specific model.
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
        self.model = AutoModel.from_pretrained(
            model_name,
            use_auth_token=hf_token,
            device_map="auto",  # Automatically distribute model layers across devices
            torch_dtype=torch.float16  # Load the model in half precision to save memory
        )

    def calculate_embedding(self, text):
        """
        Calculate the embedding for a given text.
        """
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        # Move inputs to the same device as the model
        inputs = {key: value.to(self.model.device) for key, value in inputs.items()}
        with torch.no_grad():
            outputs = self.model(**inputs, output_hidden_states=True)
            # Use the last hidden layer's CLS token embedding
            embedding = outputs.hidden_states[-1][:, 0, :].squeeze().cpu().numpy()
        return embedding

In [None]:
class DynamicMemory:
    def __init__(self):
        """
        Initialize dynamic memory for embeddings.
        """
        self.memory = {}  # Store embeddings in a dictionary for fast retrieval

    def add_embedding(self, key, embedding):
        """
        Add an embedding to the dynamic memory.
        """
        self.memory[key] = embedding

    def get_embedding(self, key):
        """
        Retrieve an embedding from the dynamic memory.
        """
        return self.memory.get(key, None)

    def visualize_memory(self):
        """
        Visualize the stored embeddings as a table.
        """
        if not self.memory:
            print("Dynamic memory is empty.")
            return

        # Display the keys of the memory (question/answer labels)
        data = [{"Key": key, "Embedding Shape": embedding.shape} for key, embedding in self.memory.items()]
        df = pd.DataFrame(data)
        print(df)

In [None]:
def save_single_embedding(embedding, filename):
    """
    Save a single embedding to a NumPy file.
    """
    directory = os.path.dirname(filename)
    if directory and not os.path.exists(directory):
        os.makedirs(directory)  # Create the directory if it doesn't exist
    np.save(filename, embedding)


In [None]:
def load_or_calculate_embedding(calculator, text, filename):
    """
    Load a precomputed embedding from a .npy file or calculate and save it if the file does not exist.
    """
    if os.path.exists(filename):
        print(f"Loading precomputed embedding from {filename}")
        return np.load(filename)
    else:
        print(f"Calculating embedding for: {text[:50]}...")  # Show a preview of the text
        embedding = calculator.calculate_embedding(text)
        save_single_embedding(embedding, filename)
        return embedding


In [None]:
def main():
    # Authenticate with Hugging Face
    print("Authenticating with Hugging Face...")
    login()  # You will be prompted to enter your Hugging Face token

    # Specify the model name
    model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with your model
    hf_token = input("Enter your Hugging Face token: ").strip()

    # Initialize the embedding calculator and dynamic memory
    calculator = EmbeddingCalculator(model_name, hf_token)
    memory = DynamicMemory()

    # Load JSON file containing the original query and QnA pairs
    try:
        from google.colab import files
        print("Please upload the JSON file containing the original query and related questions.")
        uploaded = files.upload()  # Returns a dictionary of uploaded files
        json_file = list(uploaded.keys())[0]  # Get the first uploaded file
        with open(json_file, "r") as f:
            data = json.load(f)
    except ImportError:
        json_file = input("Enter the path to the JSON file: ").strip()
        with open(json_file, "r") as f:
            data = json.load(f)

    # Extract original query, its answer, and related questions
    original_query = data.get("original_query")
    original_answer = data.get("original_answer")
    qna_pairs = data.get("qna_pairs")  # List of question-answer pairs
    if not original_query or not original_answer or not qna_pairs:
        print("Invalid JSON format. Ensure the file contains 'original_query', 'original_answer', and 'qna_pairs' keys.")
        return

    print(f"Original query: {original_query}")
    print(f"Original answer: {original_answer}")
    print(f"Number of QnA pairs: {len(qna_pairs)}")

    # Load or calculate embeddings for the original query
    original_query_embedding = load_or_calculate_embedding(
        calculator, original_query, "dynamic_embeddings/original_query.npy"
    )
    memory.add_embedding("original_query", original_query_embedding)

    # Load or calculate embeddings for the original answer
    original_answer_embedding = load_or_calculate_embedding(
        calculator, original_answer, "dynamic_embeddings/original_answer.npy"
    )
    memory.add_embedding("original_answer", original_answer_embedding)

    # Process and save embeddings dynamically for each QnA pair
    print("Processing QnA pairs...")
    for i, pair in enumerate(qna_pairs):
        question = pair.get("question")
        answer = pair.get("answer")

        print(f"Processing QnA Pair {i+1}:")
        print(f"Question: {question}")
        print(f"Answer: {answer}")

        # Load or calculate embeddings for the question and answer
        question_embedding = load_or_calculate_embedding(
            calculator, question, f"dynamic_embeddings/question_{i+1}.npy"
        )
        answer_embedding = load_or_calculate_embedding(
            calculator, answer, f"dynamic_embeddings/answer_{i+1}.npy"
        )

        # Add embeddings to dynamic memory
        memory.add_embedding(f"question_{i+1}", question_embedding)
        memory.add_embedding(f"answer_{i+1}", answer_embedding)

    print("All embeddings for questions and answers are processed and saved.")

    # Visualize dynamic memory
    print("\nVisualizing dynamic memory:")
    memory.visualize_memory()

    # Retrieval example
    print("\nRetrieving first question embedding from memory:")
    retrieved_embedding = memory.get_embedding("question_1")
    if retrieved_embedding is not None:
        print("Retrieved embedding shape:", retrieved_embedding.shape)
    else:
        print("Embedding not found.")

if __name__ == "__main__":
    main()

Authenticating with Hugging Face...


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Enter your Hugging Face token: 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




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Please upload the JSON file containing the original query and related questions.


Saving questions_answers(1).json to questions_answers(1) (1).json
Original query: what is life?
Original answer: what is life? - a philosophical question
Philosophers have been pondering the question of what life is for thousands of years. It is a question that has sparked debate, discussion, and introspection. Here are some of the key philosophical perspectives on what life is:

1. **Biological perspective**: From a biological standpoint, life is a complex process of metabolism, growth, reproduction, and response to stimuli. This perspective views life as a set of physical and chemical processes that sustain living organisms.
2. **Teleological perspective**: This perspective posits that life has a purpose or direction, and that living things strive to achieve certain goals or fulfill certain functions. For example, the purpose of life might be to survive, reproduce, and perpetuate the species.
3. **Existentialist perspective**: Existentialists argue that life has no inherent meaning o