In [39]:
import os
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
import google.generativeai as genai

In [40]:
# set gemini api key as environment variable
os.environ['GEMINI_API_KEY'] = '********'

In [13]:
def read_data(file_name):
    file_path = (file_name)
    loader = CSVLoader(file_path = file_path)
    data = loader.load()

    content = []
    for doc in data:
        con = doc.page_content
        content.append(con)
        
    return content

In [14]:
data = read_data('imdb_top_1000.csv')

In [17]:
def embed_and_load(content):
    embeddings = SentenceTransformerEmbeddings(model_name = 'sentence-transformers/all-mpnet-base-v2')
    db = Chroma.from_texts(content, embeddings)
    return db

In [18]:
db = embed_and_load(data)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [21]:
def get_relevant_data(query, db):
  passage = db.similarity_search(query = query)
  return passage

In [37]:
def make_rag_prompt(query, relevant_passage):
  prompt = ("""You are a helpful and informative bot that gives information about movies using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  strike a friendly and converstional tone. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
  """).format(query = query, relevant_passage = relevant_passage)

  return prompt

In [23]:
def generate_answer(prompt):
    gemini_api_key = os.getenv("GEMINI_API_KEY")
    if not gemini_api_key:
        raise ValueError("Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable")
    genai.configure(api_key = gemini_api_key)
    model = genai.GenerativeModel('gemini-pro')
    answer = model.generate_content(prompt)
    return answer.text

In [26]:
def final_answer(db, query):
    relevant_text = get_relevant_data(query, db)
    prompt = make_rag_prompt(query, relevant_passage = relevant_text)
    answer = generate_answer(prompt)

    return answer

In [43]:
answer = final_answer(db = db, query = "Who is the director of interstellar")
print(answer)

The director of Interstellar is Christopher Nolan.
