In [5]:
!pip install faiss-cpu ollama numpy transformers torch




In [1]:
import ollama
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
o_model = 'deepseek-r1:1.5b'

In [3]:
# Function to get embeddings
def get_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().numpy()


In [4]:
documents = [
    "SJTU had 9133 class, Lao Hu is very active in the class",
    "Bird Man studied in 33174 class, he is always a deep thinker when he was in SJTU or graduated from the university.",
    "The cat sat on the mat.",
    "Dogs are great companions.",
    "The sun rises in the east.",
    "Python is a popular programming language.",
    "The Eiffel Tower is in Paris."
]

In [5]:
dimension = 384  # Dimension of the embeddings
index = faiss.IndexFlatL2(dimension)


In [6]:
document_embeddings = np.vstack([get_embedding(doc) for doc in documents])
index.add(document_embeddings)

In [7]:
def retrieve_document(query, k=1):
    query_embedding = get_embedding(query)
    distances, indices = index.search(query_embedding, k)
    return [documents[i] for i in indices[0]]


In [8]:
def generate_response(query):
    retrieved_docs = retrieve_document(query)
    context = " ".join(retrieved_docs)
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    print(prompt)
    response = ollama.generate(model=o_model,prompt=prompt)
    return response


In [9]:
query = "Who is the smart guy in 33174?"

In [10]:
response = generate_response(query)

Context: Bird Man studied in 33174 class, he is always a deep thinker when he was in SJTU or graduated from the university.

Question: Who is the smart guy in 33174?

Answer:


In [11]:
print("Response:", response['response'])

Response: <think>
Okay, so I'm trying to figure out who the smart guy in 33174 is based on the given context. Let me read through it again carefully. 

The context says that Bird Man studied at 33174 class and he's always a deep thinker when he was in SJTU or graduated from university. The question is asking who the smart guy there is, so I'm guessing it's about identifying someone else, not Bird Man.

Hmm, maybe there are other students or staff members in that class or department. Since 33174 seems to be a specific code for a building or department, perhaps 33174 class refers to something like the third floor of building 3 or number 71 (since 33+17 is 50). But I'm not sure if that's relevant.

The context mentions that Bird Man is deep thinking there when in SJTU or university. So maybe he's a student who's particularly sharp and insightful. On the other hand, since the question is about finding another smart guy, perhaps it's referring to a faculty member or a teaching assistant?

W

In [12]:
document_embeddings


array([[-0.31338495,  0.04425209, -0.3133549 , ..., -0.14250495,
        -0.665298  ,  0.43502602],
       [ 0.13533844, -0.08379436, -0.13755773, ..., -0.00178358,
        -0.44546187,  0.11593696],
       [ 0.78422046, -0.09497552, -0.22108857, ...,  0.30985013,
         0.01786342,  0.31611723],
       ...,
       [ 0.00909124,  0.4990011 ,  0.48946318, ..., -0.02088955,
        -0.3211978 ,  0.20304598],
       [-0.19325323, -0.08373971, -0.05751083, ...,  0.75563073,
         1.0413365 ,  0.16750695],
       [ 0.40764534,  0.21286911,  0.18326026, ...,  0.11061604,
         0.37101394,  0.3125213 ]], dtype=float32)

In [13]:
len(document_embeddings)

7

In [15]:
len(document_embeddings[0])


384

In [17]:
document_embeddings[0]

array([-3.13384950e-01,  4.42520902e-02, -3.13354909e-01, -3.35043706e-02,
        2.88317859e-01,  3.01725835e-01, -2.28521660e-01,  9.12582129e-02,
       -5.23138881e-01, -1.92603737e-01,  4.27228540e-01,  3.73506546e-02,
       -1.56339005e-01, -3.72278720e-01,  1.12856187e-01, -1.34598151e-01,
        1.29387677e-01,  7.89749324e-02, -4.90241311e-02, -6.71905577e-01,
        9.52798799e-02, -8.42582807e-02,  2.67494380e-01, -1.78499185e-02,
        1.56675637e-01, -6.94557801e-02,  8.40093195e-02, -1.56203061e-01,
        9.57569927e-02, -4.31807399e-01, -5.61613441e-02,  3.73505473e-01,
        1.55442640e-01,  2.01965153e-01, -2.26853266e-01,  3.34672660e-01,
       -5.81312515e-02,  6.08717054e-02,  1.60044268e-01,  2.14835778e-01,
       -4.89433073e-02, -2.04534084e-01,  5.34454107e-01, -4.20967847e-01,
        2.58447915e-01, -4.38286811e-01, -3.72478604e-01, -5.79464555e-01,
       -3.76888774e-02, -3.56309950e-01,  1.98429660e-03, -3.78072634e-02,
        2.49954732e-03, -

In [18]:
document_embeddings[1]

array([ 1.35338441e-01, -8.37943554e-02, -1.37557730e-01,  3.67650948e-02,
       -2.68033415e-01, -6.65589198e-02, -7.36890687e-03, -1.11448318e-01,
       -7.12174922e-02,  6.73540458e-02, -2.48500258e-01, -3.55330203e-03,
       -2.65568614e-01,  5.72488271e-03, -7.76972622e-02, -6.31485805e-02,
       -4.15043607e-02,  1.31824904e-03,  8.52839276e-02, -2.81401724e-01,
       -2.69379765e-01,  2.29214638e-01,  2.13231713e-01, -6.32483810e-02,
        1.39867291e-01, -9.47968476e-03,  4.28013504e-02,  1.40822225e-03,
        6.06070459e-02,  7.72481263e-02,  1.40402541e-01,  3.63425434e-01,
       -7.44939223e-03,  1.75124168e-01, -9.08588618e-02,  9.40840468e-02,
        3.80521901e-02,  3.33124906e-01,  1.26568064e-01, -1.31464496e-01,
       -7.90566504e-02,  6.02593347e-02,  2.39496157e-01,  2.20698640e-01,
       -7.65588600e-03, -1.50140479e-01, -2.70879507e-01, -1.40907586e-01,
        1.93624362e-01, -5.83928078e-02, -4.47703153e-01, -1.47962704e-01,
       -7.30731785e-02, -

In [19]:
get_embedding(query)

array([[-2.27441296e-01,  3.33631128e-01, -4.53439802e-01,
         1.83711514e-01, -6.56847715e-01, -3.72543067e-01,
         4.27268058e-01,  1.84258059e-01, -5.27532578e-01,
         2.10130945e-01,  8.76623467e-02, -2.79604048e-01,
         2.44342849e-01,  5.39644510e-02, -5.42566359e-01,
         1.85140565e-01,  8.74144509e-02, -3.93703789e-01,
         1.41154483e-01, -3.38641644e-01, -9.80650559e-02,
        -2.06188094e-02,  1.52575880e-01, -6.18588924e-02,
         3.01663727e-01,  3.73396516e-01,  3.99404280e-02,
        -2.20732555e-01, -9.26253796e-02, -2.04763412e-01,
         2.91062623e-01,  1.23271644e-01,  4.62712973e-01,
         1.44287497e-02, -4.65575844e-01, -1.33453265e-01,
         4.63331550e-01,  4.68827516e-01,  4.47146624e-01,
        -3.83123875e-01, -3.40430230e-01, -4.12777305e-01,
         1.27799556e-01,  4.62433964e-01, -2.42699444e-01,
        -2.28748694e-01, -6.63773492e-02, -5.69859594e-02,
         6.26965910e-02,  7.74808004e-02, -5.02596557e-0