In [15]:
COLLECTION_NAME = 'movies_vecdb'
DIMENSION = 384
URI = 'http://localhost:19530'

In [1]:
!python --version

Python 3.10.12


In [2]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.milvus import Milvus
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [17]:
import os

docs = []

for file in os.listdir('./movie_text_data/'):
    loader = TextLoader(f'./movie_text_data/{file}')
    docs.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)  # DA OTTIMIZZARE
all_splits = text_splitter.split_documents(docs)

In [18]:
import torch

device = None
if torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model_name = "sentence-transformers/all-MiniLM-L12-v2"
model_kwargs = {'device': device}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [7]:
vector_db = Milvus(
    embedding_function=embeddings,
    connection_args={'host':'127.0.0.1', 'port':'19530'},
    drop_old=False
)

In [19]:
vector_db = Milvus.from_documents(
    all_splits,
    embeddings,
    connection_args={'host':'127.0.0.1', 'port':'19530'},
)

In [20]:
query = "Which are the Crime movies directed by Martin Scorsese?"
result = vector_db.similarity_search(query, k=3)

In [21]:
for n, doc in enumerate(result):
    print(f"DOCUMENT {n+1}:")
    print(doc.page_content)
    print()

DOCUMENT 1:
Title: Dos crímenes
Director: Roberto Sneider
Genres: Comedy, Crime, Drama
Brief Plot: After being falsely accused of a murder, Marcos (Alcazar) runs away to the small and quiet town of Cuevano, where he has relatives. There he finds that the family matters are more complicate...
Year: 1994
Stars: Damián Alcázar, José Carlos Ruiz, Pedro Armendáriz Jr.

DOCUMENT 2:
Title: Dos crímenes
Director: Roberto Sneider
Genres: Comedy, Crime, Drama
Brief Plot: After being falsely accused of a murder, Marcos (Alcazar) runs away to the small and quiet town of Cuevano, where he has relatives. There he finds that the family matters are more complicate...
Year: 1994
Stars: Damián Alcázar, José Carlos Ruiz, Pedro Armendáriz Jr.

DOCUMENT 3:
Title: Casino
Director: Martin Scorsese
Genres: Crime, Drama
Brief Plot: In Las Vegas, two best friends - a casino executive and a mafia enforcer - compete for a gambling empire and a fast-living, fast-loving socialite.
Year: 1995
Stars: Robert De Niro, 

In [22]:
print(f"Default collection name - {vector_db.collection_name}")
print(f"Default search params - {vector_db.search_params}")
print(f"Default index params - {vector_db.index_params}")

Default collection name - LangChainCollection
Default search params - {'metric_type': 'L2', 'params': {'ef': 10}}
Default index params - None


In [23]:
vector_db.col

<Collection>:
-------------
<name>: LangChainCollection
<description>: 
<schema>: {'auto_id': True, 'description': '', 'fields': [{'name': 'source', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'enable_dynamic_field': False}

In [24]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

In [27]:
llm = LlamaCpp(
        model_path="./models/llama-2-7b-chat.Q4_K_M.gguf",
        n_gpu_layers=-1,
        n_batch=512,
        temperature=0.1,
        n_threads=8,
        max_tokens=16384,
        # f16_kv=True,
        # callback_manager=CallbackManager(StreamingStdOutCallbackHandler()),
        verbose=True
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ./models/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32       

In [28]:
from langchain_core.prompts import PromptTemplate

template = """You are an helpful AI assistant, which is expert about the field of Movies and Cinema.
Answer to the question in a precise way: use the search results to build the answer, if the asked informations
don't exist within the search results, DON'T INVENT THEM AND JUST ANSWER 'I don't know how to answer to this question'. 

For each movie, report these informations following this structure:
- Title
- Year
- Genre
- Director
- Brief description of the plot
- Main actors

Generate a maximum of 5 movies.
At the end of your whole answer, you must generate the special token '[END]'. Do not generate other tokens after this one.

Question: {question}

Answer: """

prompt = PromptTemplate.from_template(template)

In [29]:
from langchain_core.runnables import RunnablePassthrough
retriever = vector_db.as_retriever()
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
)

In [30]:
resp = rag_chain.invoke("Report me one Crime movie or with Johnny Depp")
print(str(resp))


llama_print_timings:        load time =    9664.64 ms
llama_print_timings:      sample time =     221.79 ms /   127 runs   (    1.75 ms per token,   572.61 tokens per second)
llama_print_timings: prompt eval time =    9664.10 ms /   182 tokens (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:        eval time =   19856.82 ms /   126 runs   (  157.59 ms per token,     6.35 tokens per second)
llama_print_timings:       total time =   36089.24 ms /   308 tokens




- Title: The Departed (2006)
- Year: 2006
- Genre: Crime, Thriller
- Director: Martin Scorsese
- Brief description of the plot: The Departed is a crime drama film that follows an undercover cop (Leonardo DiCaprio) who infiltrates a Boston crime syndicate, while a mole within the police department (Matt Damon) works to uncover his identity.
- Main actors: Leonardo DiCaprio, Matt Damon, Jack Nicholson
[END]
