In [1]:
import os

os.environ['GEMINI_API_KEY'] = '*******'

In [2]:
import json

def read_data(file_name):
    with open(file_name, 'r', encoding='utf-8') as f:
        sample_data = json.load(f)

    content = []
    
    for data in sample_data['data']:
        for paragraph in data['paragraphs']:
            content.append(paragraph['context'])
    return content

In [3]:
content = read_data('dev-v2.0.json')

In [4]:
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

def embed_and_load(content):
    embeddings = SentenceTransformerEmbeddings(model_name = 'sentence-transformers/all-mpnet-base-v2')
    db = Chroma.from_texts(content, embeddings)
    return db

In [5]:
db = embed_and_load(content)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [6]:
def get_relevant_data(query, db):
  passage = db.similarity_search(query = query)
  return passage

In [7]:
def make_rag_prompt(query, relevant_passage):
  prompt = ("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
  """).format(query = query, relevant_passage = relevant_passage)

  return prompt

In [8]:
import google.generativeai as genai

def generate_answer(prompt):
    gemini_api_key = os.getenv("GEMINI_API_KEY")
    if not gemini_api_key:
        raise ValueError("Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable")
    genai.configure(api_key = gemini_api_key)
    model = genai.GenerativeModel('gemini-pro')
    answer = model.generate_content(prompt)
    return answer.text

In [9]:
def final_answer(db,query):
    relevant_text = get_relevant_data(query,db)
    prompt = make_rag_prompt(query, relevant_passage = relevant_text)
    answer = generate_answer(prompt)

    return answer

In [12]:
answer = final_answer(db = db,query = "What is the scientific model of a general computing machine?")
print(answer)

A Turing machine is a theoretical mathematical model that broadly represents any computing machine - even including a human with pencil and paper! They are used in computer science to study and solve problems that might arise in any type of computation scenario.
