In [1]:
from langchain_ollama import OllamaLLM

model = OllamaLLM(model="deepseekmini")

In [None]:
print(model.invoke("Come up with 10 names for a song about parrots"))

ConnectError: [WinError 10061] No connection could be made because the target machine actively refused it

In [1]:
import torch
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained('facebook/contriever-msmarco')
model = AutoModel.from_pretrained('facebook/contriever-msmarco')

sentences = [
    "Where was Marie Curie born?",
    "Maria Sklodowska, later known as Marie Curie, was born on November 7, 1867.",
    "Born in Paris on 15 May 1859, Pierre Curie was the son of Eugène Curie, a doctor of French Catholic origin from Alsace."
]

# Apply tokenizer
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
outputs = model(**inputs)

# Mean pooling
def mean_pooling(token_embeddings, mask):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings
embeddings = mean_pooling(outputs[0], inputs['attention_mask'])

  from scipy.sparse import csr_matrix, issparse


In [2]:
print(embeddings)  # Should print the shape of the embeddings tensor

tensor([[ 0.0161,  0.0055,  0.0199,  ...,  0.0372, -0.0831, -0.0112],
        [ 0.0037,  0.0346, -0.0131,  ...,  0.0247, -0.1021, -0.0303],
        [-0.0146, -0.0235, -0.0338,  ...,  0.0277, -0.0025, -0.0092]],
       grad_fn=<DivBackward0>)


In [6]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")

In [None]:
import ollama

# Pull the model first to avoid ResponseError
ollama.pull('nomic-embed-text')

EmbeddingsResponse(embedding=[0.5976036190986633, 0.4045855700969696, -3.2977986335754395, -0.5292254090309143, 0.7531763911247253, 1.517987608909607, -0.12513232231140137, 0.4000648856163025, 0.07044126838445663, -1.1075001955032349, 0.6895996928215027, 1.2771917581558228, 1.1497957706451416, 1.0925021171569824, 0.24731364846229553, 0.2944515347480774, 0.14713799953460693, -0.6368029117584229, -0.20454668998718262, -0.19820916652679443, -1.7970716953277588, -0.6304977536201477, 0.035287044942379, -0.6740528345108032, 1.2647573947906494, 1.2760339975357056, -0.16538764536380768, -0.002988673746585846, -0.2957589030265808, -0.48256978392601013, 1.204719066619873, -0.6365360617637634, -0.5416136384010315, -1.0356149673461914, 0.6262243390083313, -1.2044473886489868, 0.6863617300987244, -0.05992253124713898, -0.19760572910308838, 0.12687329947948456, -0.014740394428372383, -0.5532193779945374, 0.3526389002799988, 0.04843376576900482, 0.6000370383262634, -0.9533551931381226, 0.507830500602

In [28]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

In [30]:
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(path="/tmp/my_documents")

client.create_collection(
    collection_name="my_documents",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="my_documents",
    embedding=embeddings,
    retrieval_mode=RetrievalMode.DENSE,
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "How much money did the robbers steal?"
found_docs = qdrant.similarity_search(query)
found_docs

[Document(metadata={'source': 'news', '_id': '0cf40ed8-c454-404e-872b-01ff53327b03', '_collection_name': 'my_documents'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'news', '_id': '9b5d0c6e-5da2-4dbe-a7bc-828a25672468', '_collection_name': 'my_documents'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'tweet', '_id': '5aad140e-7efb-4523-8c47-0d43a5741b80', '_collection_name': 'my_documents'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website', '_id': '9b5def4f-cff3-4945-8be0-756f85ea1337', '_collection_name': 'my_documents'}, page_content='The top 10 soccer players in the world right now.')]