In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [2]:
model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [25]:
documents = [
    "Name: Muhammad Hassan",
    "Profession: Artificial Intelligence Engineer",
    "Education: Engineering",
    "Institute: IBM",
    "Total Experience: 2 years",
    "Primary Programming Language: Python",
    "Career started with Learning Python programming",
    "Specializations: Artificial Intelligence",
    "Skills: ChatGPT Prompt Engineering, Machine Learning, Deep Learning, Artificial Intelligence, Large Language Models, Sequential Models, Building AI Applications",
    "Daily Work Routine: 2 hours per day",
    "Projects: AI-integrated Chatbots, Web Applications, AI Applications",
    "Freelance Work Includes: AI Integrated Chatbots, Python Programming, Model Training, Custom Model Development, AI Agents, Workflow Automation",
    "Freelancing Work Nature: Work from home",
    "Freelance Payment: Minimum $120, Maximum $5000",
    "Additional Reinforcements available for $50",
    "Accepted Payment Methods: Bank Transfer, Visa Card"
]

In [26]:
doc_embeddings = embedding_model.encode(documents)
if len(doc_embeddings.shape) == 1:
    doc_embeddings = doc_embeddings.reshape(1, -1)
dim = doc_embeddings.shape[1]
index = faiss.IndexFlat(dim)
index.add(doc_embeddings)

In [None]:
def retrieve(query, k=2):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [documents[idx] for idx in indices[0]]

In [30]:
result = retrieve(query="Who is dumb")
print(result)

['Profession: Artificial Intelligence Engineer', 'Specializations: Artificial Intelligence']
