In [1]:
pip install langchain torch transformers sentence-transformers datasets tiktoken upstash-vector


Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
from datasets import load_dataset
from upstash_vector import Index
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM

# Step 1: Initialize Upstash Vector
UPSTASH_VECTOR_REST_URL = UPSTASH_VECTOR_REST_URL
UPSTASH_VECTOR_REST_TOKEN = UPSTASH_VECTOR_REST_TOKEN


# Initialize Upstash Index client
vector_client = Index(UPSTASH_VECTOR_REST_URL, UPSTASH_VECTOR_REST_TOKEN)

# Step 2: Initialize Embedding Model (MiniLM)
def initialize_embedding_model():
    print("Initializing embedding model (MiniLM)...")
    return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

embedding_model = initialize_embedding_model()


Initializing embedding model (MiniLM)...


In [None]:
# Step 3: Initialize Generative Model (Gemma)
def initialize_generative_model():
    print("Initializing generative model (Gemma)...")
    #put HF token
    token= HF_TOKEN
    model_name = "google/gemma-2b"
    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token)
    tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512, use_auth_token=token)
    return TextGenerationPipeline(model=model, tokenizer=tokenizer)

generative_model = initialize_generative_model()


Initializing generative model (Gemma)...


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
# Step 4: Load Khan Academy Dataset
def load_khan_academy_dataset():
    print("Loading Khan Academy dataset...")
    dataset = load_dataset("HuggingFaceTB/cosmopedia", "khanacademy", split="train")
    return dataset

khan_dataset = load_khan_academy_dataset()

Loading Khan Academy dataset...


Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

In [5]:
# Step 5: Add Dataset to Upstash Vector
def add_dataset_to_upstash(dataset, vector_client, embedding_model):
    print("Adding dataset to Upstash Vector...")
    for idx, record in enumerate(dataset):
        # Generate a unique ID for each document
        document_id = f"doc_{idx}"

        # Extract the document text
        text = record['text']  # Ensure the key matches the dataset structure

        # Generate embedding
        embedding = embedding_model.encode(text).tolist()  # Convert to list for JSON serialization

        # Use `upsert` method to add to the index
        vector_client.upsert(
            vector=embedding,  # The dense vector
            metadata={"text": text, "id": document_id}  # Metadata containing text and document ID
        )

        # Log progress
        if idx % 100 == 0:
            print(f"Added {idx} records to Upstash Vector.")

In [6]:
# Step 6: Query Upstash Vector
def search_upstash(query, top_k=5):
    print(f"Searching Upstash Vector for query: {query}")

    # Generate the query embedding
    query_embedding = embedding_model.encode(query).tolist()

    # Query the Upstash Vector index
    results = vector_client.query(
        vector=query_embedding,  # Query vector
        top_k=top_k,  # Number of top results to retrieve
        include_vectors=False,  # We only need metadata, not the vectors
        include_metadata=True   # Include metadata in the results
    )

    # Extract the document text from the results' metadata
    retrieved_documents = [result.metadata["text"] for result in results]

    return retrieved_documents



In [7]:
def generate_answer(query, retrieved_docs, generative_model):
    print("Generating answer with generative model...")

    # Combine retrieved documents into a single context
    context = "\n".join(retrieved_docs)
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

    # Generate the answer
    response = generative_model(
        prompt,
        max_length=512,  # Allow for longer inputs
        max_new_tokens=100,  # Restrict the length of the generated response
        num_return_sequences=1
    )[0]['generated_text']

    print("\n--- Query Results ---")
    display(pd.DataFrame(retrieved_docs).rename(columns={"text": "Retrieved Context", "score": "Similarity Score"}))
    print("\n--- Generated Answer ---")
    print(response)
    return response


In [8]:
# Step 8: Full RAG Pipeline
def rag_pipeline(query, vector_client, embedding_model, generative_model, top_k=5):
    # Step 8.1: Retrieve relevant documents
    retrieved_docs = search_upstash(query, top_k)
    # Step 8.2: Generate an answer
    answer = generate_answer(query, retrieved_docs, generative_model)
    return answer

In [9]:
if __name__ == "__main__":
    query = "What is the importance of gravity in physics?"
    response = rag_pipeline(query, vector_client, embedding_model, generative_model, top_k=5)
    print(f"Query: {query}\nAnswer: {response}")


Searching Upstash Vector for query: What is the importance of gravity in physics?


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=100) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Generating answer with generative model...

--- Query Results ---


Unnamed: 0,0
0,Mass is a measure of the amount of matter in a...
1,It is important to note that weight is differe...
2,Mass is a fundamental physical property of an ...
3,One important aspect of the metric system is i...
4,Or suppose you work in a laboratory setting an...



--- Generated Answer ---
Context:
Mass is a measure of the amount of matter in an object. It is different from weight, which depends on gravity. An astronaut weighs less on the moon than on Earth because of the lower gravitational pull, but their mass remains constant no matter where they are in the universe.

The basic unit of mass in the metric system is the gram. A gram is defined as the mass of one cubic centimeter of water at four degrees Celsius. This definition was chosen because water has a relatively consistent density, making it a useful standard for measuring mass.

It can be helpful to think of grams as a small unit of measurement, similar to inches or cents. Just as we might measure the length of a pencil in inches or the cost of a candy bar in cents, we can measure the mass of small objects in grams. For example, a paperclip typically has a mass of about 1 gram, and a penny has a mass of approximately 2.5 grams.
It is important to note that weight is different from mass.

gpt-2 

In [1]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set the model in evaluation mode
model.eval()

# Function to generate a response based on a prompt
def generate_response(prompt, max_length=512, temperature=0.7, top_k=50):
    # Tokenize the input prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text
    outputs = model.generate(
        inputs,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        do_sample=True,
        num_return_sequences=1,
    )

    # Decode the output to get the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example prompt
prompt = "What is the importance of gravity in physics?"

# Generate and print the response
response = generate_response(prompt)
print("Generated Response:")
print(response)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Response:
What is the importance of gravity in physics? In an interview with Science Daily last year, physicist Richard Feynman said: "The problem is that science has to deal with the vacuum of time. We have to figure out what it was like for a particular time, and how that time came about. We have to give the right answer, and then we have to come up with another answer."

"We are not trying to say that gravity is the answer, and we're not trying to say that it isn't. But it is the answer," he continued. "We have to keep trying to figure out how it came about. We are not trying to say that gravity is the answer, and we're not trying to say that it isn't. But it is the answer."

The idea behind gravity is to give us the right answer, and we can't do that, but we can try to explain it.

What's the difference between gravity and gravity theory?

In general, gravity theory is the theory of the laws of physics. But some of the most popular papers and books on the subject, like 'Q

gemma-2b

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = 'google/gemma-2b'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

model.eval()

# Function to generate a response
def generate_response(prompt, max_length=512, temperature=0.7):
    inputs = tokenizer.encode(prompt, return_tensors="pt")

    outputs = model.generate(
        inputs,
        max_length=max_length,
        temperature=temperature,
        do_sample=True,
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example prompt
prompt = "What is the importance of gravity in physics?"

# Generate and print response
response = generate_response(prompt)
print("Generated Response:")
print(response)


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Generated Response:
What is the importance of gravity in physics?

Which of the following statements about the primary structure of a protein is not correct? (a) The amino acid sequence determines the structure of a protein. (b) Hydrogen bonds, ionic bonds, and hydrophobic interactions are all energy-rich connections between amino acids. $(c)$ The primary structure of a protein contains covalent bonds between amino acids. (d) Modifications in the secondary structure of a protein are related to conformational changes.

A $4.0$-m-diameter playground merry-go-round, with a moment of inertia of $400 \mathrm{~kg} \cdot \mathrm{m}^2$, is freely rotating with an angular velocity of $2.0 \mathrm{rad} / \mathrm{s}$. Ryan, whose mass is $80 \mathrm{~kg}$, runs on the ground the outside of the merry-go-round. He runs at a speed of $5.0 \mathrm{~m} / \mathrm{s}$ relative to the ground and in a direction tangential to the outside of the merry-go-round. In this direction, how much work has Ryan's ki

In [None]:
help(vector_client.query)


Help on method query in module upstash_vector.core.index_operations:

query(vector: Union[List[float], upstash_vector.types.SupportsToList, NoneType] = None, top_k: int = 10, include_vectors: bool = False, include_metadata: bool = False, filter: str = '', data: Optional[str] = None, namespace: str = '', include_data: bool = False) -> List[upstash_vector.types.QueryResult] method of upstash_vector.client.Index instance
    Query `top_k` many similar vectors.
    Requires either `data` or `vector` paramter.
    Raises exception if both `data` and `vector` parameters are used.
    
    :param vector: The vector value to query.
    :param top_k: How many vectors will be returned as the query result.
    :param include_vectors: Whether the resulting `top_k` vectors will have their vector values or not.
    :param include_metadata: Whether the resulting `top_k` vectors will have their metadata or not.
    :param filter: Filter expression to narrow down the query results.
    :param data: Dat

Retrieved Context: The content of the document retrieved from the vector database.


Similarity Score: A numeric score indicating how closely the document matches the query (higher values are better).