# LLM Practice

In [1]:
from transformers import AutoTokenizer, AutoModel
import faiss
import numpy as np

  _torch_pytree._register_pytree_node(


In [2]:
# Load a pre-trained embedding model (e.g., SentenceTransformers)
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def generate_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    # Use the mean of the token embeddings as the sentence embedding
    embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
    return embedding

# Example documents
documents = ["What is a vector database?", "Explain large language models.", "How does FAISS work?"]

# Generate embeddings for documents
embeddings = np.vstack([generate_embedding(doc) for doc in documents])

Downloading tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

  _torch_pytree._register_pytree_node(


Downloading model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

In [3]:
# Create a FAISS index (IndexFlatL2 for cosine similarity)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # L2 is Euclidean distance; use IndexFlatIP for cosine similarity
index.add(embeddings)  # Add document embeddings to the index

In [5]:
# Query
query = "How are embeddings used in AI?"
query_embedding = generate_embedding(query)

# Search the FAISS index
k = 2  # Number of nearest neighbors
distances, indices = index.search(query_embedding, k)

# Retrieve the matching documents
matched_docs = [documents[i] for i in indices[0]]
print("Top matches:", matched_docs)


Top matches: ['What is a vector database?', 'Explain large language models.']


In [6]:
from transformers import pipeline

# Load a pre-trained LLM (e.g., GPT-style model)
qa_pipeline = pipeline("text-generation", model="gpt2")

# Combine matched documents as context
context = " ".join(matched_docs)
prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"

# Generate an answer
response = qa_pipeline(prompt, max_length=100, num_return_sequences=1)
print(response[0]['generated_text'])


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Downloading config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

  _torch_pytree._register_pytree_node(


Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Context: What is a vector database? Explain large language models.

Question: How are embeddings used in AI?
Answer: The same thing that you mentioned is the "interpreter"? All of the things that can be done in a programming language can be done from an embeddable vector or array. This is similar to other types of embeddings, but this is in different steps. There are several different types of embeddings, from simple embeddings like
