In [1]:
from langchain.document_loaders import CSVLoader
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

In [2]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [3]:
loader = CSVLoader("../data/processed/TMDB.csv",encoding="utf-8")
documents = loader.load()

In [4]:
import os

persist_directory = "../models/chroma_db"

# Check if the directory for the Chroma database exists
if os.path.exists(persist_directory):
    # Load the existing Chroma database
    db = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
    print("Existing Chroma database loaded.")
else:
    # If the database doesn't exist, create a new one
    db = Chroma.from_documents(documents, embedding_function, persist_directory=persist_directory)
    # Persist the new database
    db.persist()
    print("New Chroma database created and persisted.")


New Chroma database created and persisted.


  db.persist()


In [5]:
query = "I want to watch a sci-fi movie."
docs = db.similarity_search(query)
print(docs[0].page_content)

index: 6258
title: Starship Troopers 2: Hero of the Federation
vote_average: 4.279
vote_count: 517
status: Released
release_date: 2004-04-24
runtime: 92
overview: In the sequel to Paul Verhoeven's loved/reviled sci-fi film, a group of troopers taking refuge in an abandoned outpost after fighting alien bugs, failing to realize that more danger lays in wait.
poster_path: /9iqrqOjLaFcGNnSQlJxYLfgrhqn.jpg
genres: Adventure, Horror, Action, Science Fiction
backdrop_path: /kSEc72AYeZEgwPBHle8bGjmRnAC.jpg


In [6]:
from langchain.prompts import ChatPromptTemplate,PromptTemplate
from langchain_community.chat_models import ChatLlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks import StreamingStdOutCallbackHandler

In [7]:
template = """You are a movie recommendation system. Based on the context provided, recommend a movie:

Context: {context}

Question: {question}
Just give the answer in 1 or 2 sentences. Don't give spoilers.
Answer (brief and direct):
"""
prompt = ChatPromptTemplate.from_template(template)

In [25]:
"""
Phi-3.5-mini-instruct-Q5_K_L.gguf
tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
Phi-3-mini-4k-instruct-q4.gguf
"""

# Initialize Callback manager
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = ChatLlamaCpp(
    model_path="../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
    n_ctx=1024,  # Optimize context size based on your needs
    verbose=True,  # Keep this only if necessary
    use_mlock=True,  # Ensure enough memory is available
    n_gpu_layers=16,  # Try increasing if you have enough GPU memory
    n_threads=8,  # Try increasing this based on CPU cores
    n_batch=1024,  # Increase batch size for higher throughput
    callback_manager=callback_manager
)


llama_model_loader: loaded meta data with 23 key-value pairs and 201 tensors from ../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = tinyllama_tinyllama-1.1b-chat-v1.0
llama_model_loader: - kv   2:                       llama.context_length u32              = 2048
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 2048
llama_model_loader: - kv   4:                          llama.block_count u32              = 22
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 5632
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 64
llama_model_loader: - kv   7:           

llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = ["<unk>", "<s>", "</s>", "<0x00>", "<...
llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,61249]   = ["▁ t", "e r", "i n", "▁ a", "e n...
llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 1
llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 2
llama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 0
llama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 2
llama_model_loader: - kv  21:                    tokenizer.chat_template str              = {% f

In [26]:
from langchain.chains import RetrievalQA
from langchain.schema import retriever

In [27]:
prompt = PromptTemplate(
    template=template, 
    input_variables=[
        'context', 
        'question',
    ]
)

# Initialise RetrievalQA Chain
chain = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=False,
    chain_type_kwargs={"prompt": prompt}
)

In [28]:
response = chain.invoke({"query": "Recommend a sci-fi movies like Star Wars"})

Based on the context provided, the movie recommendation system suggests a sci-fi movie that follows the journey of Princess Leia, the main character, and her attempts to save her people from the tyranny of the Empire. The film's title is "Star Wars: Episodes II - Attack of the Cloones," and it stars Carrie Fisher as Princess Leia. It has a vote average of 8.204 out of 19,155 votes in its respective year, making it one of the highest-rated movies on the system's recommendation database.

llama_perf_context_print:        load time =   13600.90 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   732 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   125 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   19931.81 ms /   857 tokens


In [29]:
response = chain.invoke({"query": "Recommend an commedy movie. But it should include rommence theme also"})

Llama.generate: 30 prefix-match hit, remaining 726 prompt tokens to eval


Based on the context provided, "Last Action Hero" is a movie with a romantic thematic element. It should include scenes depicting characters falling in love and interacting with each other in a fun and lighthearted manner while also highlighting the action-packed aspects of the film's plot. Additionally, it should have elements of humor and adventure that bring the characters together as they fight against villainous antagonists. The movie's runtime is 131 minutes, with a vote average of 6.432 out of 7.03, a rating of "Relaysed," and an average review score of 8.3/10 from Rotten Tomatoes.

llama_perf_context_print:        load time =   13600.90 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   726 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   148 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   21350.45 ms /   874 tokens


In [30]:
response = chain.invoke({"query": "Recommend a movie with a theme of friendship"})

Llama.generate: 30 prefix-match hit, remaining 629 prompt tokens to eval


Based on the context provided, "Friends with Benefits" is a movie recommended by the author for its theme of friendship. The movie involves two individuals, Dylan and Jamie, who are good friends, but decide to explore their sexual desires after one night of unprotected sex. Although the movie does have elements of a Hollywood romantic comedy, it explores the complexities of friendship as well as the challenges that come with trying something new. The title is a play on the popular phrase "friends for life," which suggests a deep connection between two individuals regardless of whether or not they have been together long-term. Overall, this movie provides a valuable insight into the power and importance of friendship in a wide range of situations, from romantic encounters to personal relationships.

llama_perf_context_print:        load time =   13600.90 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   629 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   164 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   20774.15 ms /   793 tokens


In [31]:
response = chain.invoke({"query": "I want to watch a superhero movie with a female main star"})

Llama.generate: 30 prefix-match hit, remaining 700 prompt tokens to eval


For a superhero movie with a female main star, the context provided suggests "Supergirl," which stars the titular character as an alien cousin of Superman. While Karla (Kara), as she is known in the story, faces a powerful enemy wicked witch to retrieve an orb lost by her cousin and find herself up against the witch, the movie's overview includes being set on Earth during a galactic war between two alien races. The release dates of the movie also suggest it is scheduled for 2019-2023 and the movie features the Justice League x RWBY: Super Heroes & Huntsmen Part One, with the release date indicating the movie will air in April 2023 on various streaming platforms. Overall, based on this information, "Supergirl" would likely fit the requirements of a superhero movie with a female main star.

llama_perf_context_print:        load time =   13600.90 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   700 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   193 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   23155.90 ms /   893 tokens


In [32]:
response = chain.invoke({"query": "I want you to recommend movies that are guaranteed to make you cry."})

Llama.generate: 30 prefix-match hit, remaining 699 prompt tokens to eval


To recommend a movie that is guaranteed to make you cry, I would suggest the classic rom-com "The Notebook" (2004). This film follows two teenage lovers who navigate the trials of life in a small Southern town during the Great Depression. The emotional intensity of this film comes from its beautiful and poignant portrayal of love and loss, making it an ideal choice for fans of heartwarming tales with heartbreaking endings.

llama_perf_context_print:        load time =   13600.90 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   699 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   101 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   17481.74 ms /   800 tokens
