In [86]:
from langchain.chains import ConversationalRetrievalChain
from langchain.schema.runnable import Runnable
import google.generativeai as genai
import os
import faiss
import numpy as np
import pandas as pd
import os


In [29]:
path = os.getcwd()

# Loading embedings
faiss_index = path + "/faiss_index"

# Loading all the data files 
data_source = path + "/data/data.txt"
pdf_source = path + "/data/resume.pdf"

google_api_key = os.getenv("GEMINI_API_KEY")

In [33]:
df = pd.read_csv('C:/Users/saura/Desktop/AI project/data/embeddings.csv')

In [25]:
api_key = os.environ.get('GEMINI_API_KEY')

In [26]:
genai.configure(api_key=api_key)

In [170]:
!pip install markdown

Collecting markdown
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Downloading Markdown-3.7-py3-none-any.whl (106 kB)
Installing collected packages: markdown
Successfully installed markdown-3.7


In [88]:

class GenerativeModelWrapper(Runnable):
    def __init__(self, model: genai.GenerativeModel):
        self.model = model

    def _run(self, inputs):
        # Assuming inputs is a dictionary with keys like 'context' and 'question'
        prompt = f"Context: {inputs['context']}\nQuestion: {inputs['question']}"
        response = self.model.generate(prompt)
        return response

    async def _arun(self, inputs):
        # If you want to make it asynchronous
        pass

    def invoke(self, inputs):
        return self._run(inputs)

# Initialize the Gemini model
gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash",
                                     generation_config=genai.GenerationConfig(temperature=0.5))

# Wrap the Gemini model in the Runnable
wrapped_model = GenerativeModelWrapper(gemini_model)


In [271]:
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.vectorstores.base import VectorStoreRetriever
from pydantic import BaseModel, Field
import numpy as np
import faiss
from IPython.display import Markdown

def faiss_retriever(index, query_vector, df, k=5):
    query_vector = np.array(query_vector).astype('float32').reshape(1, -1)
    
    # Perform search
    distances, indices = index.search(query_vector, k)
    
    # Filter by score_threshold
    results = []
    for dist, idx in zip(distances[0], indices[0]):   
        result = {
            "sentence_chunk": df.iloc[idx]["sentence_chunk"],
            "page_number": df.iloc[idx]["page_number"],
            "distance": dist
        }
        results.append(result)

    return results

class GenerativeModelWrapper(Runnable):
    def __init__(self, model: genai.GenerativeModel):
        self.model = model

    def _run(self, inputs):
        # Assuming inputs is a dictionary with keys like 'context' and 'question'
        #prompt = f"Context: {inputs['context']}\nQuestion: {inputs['question']}"
        response = self.model.generate_content(inputs['prompt'])
        return response

    async def _arun(self, inputs):
        # If you want to make it asynchronous
        pass

    def invoke(self, inputs):
        return self._run(inputs)

# Initialize the Gemini model
gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash",
                                     generation_config=genai.GenerationConfig(temperature=0.6))

# Wrap the Gemini model in the Runnable
wrapped_model = GenerativeModelWrapper(gemini_model)

class CustomFaissRetriever( BaseModel):
    index: any = Field(...)
    df: any = Field(...)
    #vectorstore: any = Field(...)  # Add this line

    class Config:
        arbitrary_types_allowed = True  # Allow FAISS index and DataFrame types

    def _get_relevant_documents(self, query: str):
        # Get query embedding using Gemini API
        query_embedding = genai.embed_content(
            model="models/text-embedding-004",
            content=query,
            task_type="retrieval_document",
            title="profile"
        )["embedding"]

        # Retrieve relevant documents using FAISS retriever
        results = faiss_retriever(self.index, query_embedding, self.df)

        # Convert FAISS results to LangChain Document objects
        docs = [
            Document(
                page_content=res["sentence_chunk"],
                metadata={"page_number": res["page_number"], "distance": res["distance"]}
            )
            for res in results
        ]
        return docs

faiss_index = faiss.read_index('C:/Users/saura/Desktop/AI project/faiss_index')
# Initialize the custom FAISS retriever
retriever = CustomFaissRetriever(index=faiss_index, df=df)  # Add vectorstore argument
# Step 4: Create a Prompt Template
prompt = """"System: You are Saurav Sharad Mestry and pretend as Saurav Mestry is talking when you ask anything, a comprehensive, interactive resource for exploring Saurav's background, skills, and expertise. Be polite and provide answers based on the provided context only as I. Use only the provided data and not prior knowledge. \n Human: Take a deep breath and do the following step by step these 4 steps: \n 1. Read the context below \n 2. Answer the question using only the provided Help Centre information \n 3. Make sure to nicely format the output so it is easy to read on a small screen. \n4. Provide 3 examples of questions user can ask about me (Saurav Mestry) based on the questions from context. Context : \n ~~~ {context} ~~~ \n User Question: --- {question} --- \n \n If a question is directed at you, clarify that you are Saurav and proceed to answer as if the question were addressed to Saurav Mestry and answer as I. If you lack the necessary information to respond, simply state that you don't know; do not fabricate an answer. If a query isn't related to Saurav Mestry's background, politely indicate that you're programmed to answer questions solely about his experience, education, training, and aspirations. Offer three sample questions users could ask about Saurav Mestry for further clarity. When responding, aim for detail but limit your answer to a maximum of 150 words. Ensure your response is formatted for easy reading. Your output should be in a json format with 3 keys: answered - type boolean, response - markdown of your answer, questions - list of 3 suggested questions. Ensure your response is formatted for easy reading and please use only context to answer the question - my job depends on it. \n\n ```json"""

prompt_template = PromptTemplate(
    input_variables=["context", "question"], 
    template=prompt
)

query = "he has worked full time ever in his life? "

docs = retriever._get_relevant_documents(query)

if not docs:
    print("No relevant documents found.")
    # Handle the error or provide a default response
else:
    context = "\n".join([doc.page_content for doc in docs])


# Now using prompt_template to generate the prompt
prompt = prompt_template.format(context=context, question=query)

# Now using Gemini model to generate the response to the prompt
response = wrapped_model.invoke({'prompt': prompt})


In [272]:
print(context)


I belong to a humble middle class family. I was born in Mumbai, India. My father owned a business and my mother is a homemaker. I have an elder sister who is also a software engineer. I did most of my primary, secondary schooling from Mumbai and since then, I’ve been staying far from home due to education or work. I’m currently 26 years old studying in the United states with my second home in Sunnyvale, California. I currently have F1 -OPT work authorization to work for any employer in the United States until my visa expires by 2028. I’m open to H1-B sponsorship in future but won’t be needing any kind of sponsorship to start working. I am an Asian hetrosexual man with a minor disability and I won’t require any kind of special accommodations and can work better than a regular person. I do not have any political background and I do not have any military experience nor any relationships involved in government whatsoever.
what is his educational background| tell me about his education| whe

In [273]:
Markdown(response.candidates[0].content.parts[0].text) #ignore this line

```json
{
 "answered": true,
 "response": "Hi, I'm Saurav!  The context doesn't explicitly state whether I've worked full-time my entire life. It does mention that I'm currently pursuing a Master's degree and that I have experience working at Cortland Las Casas. This suggests that I may have had other jobs or internships prior to my current studies.",
 "questions": [
  "What are your career aspirations?",
  "Can you tell me more about your experience at Cortland Las Casas?",
  "What are your primary technical skills?"
 ]
}
```

In [214]:

import json

# Extract the text content from the response
response_text = response.candidates[0].content.parts[0].text

# Remove the JSON code block markers
json_string = response_text.replace("```json", "").replace("```", "").strip()

# Parse the JSON string
response_json = json.loads(json_string)


In [221]:

response_json.get('response')

"Hi, I'm Saurav. I'm a seasoned software developer with almost three years of experience. I've worked with cross-functional teams to bring digital products to life. I've also been involved in management and leadership roles, leading backend teams and consulting teams. My leadership style is servant leadership, where I focus on empowering my team by removing obstacles and offering guidance. I'm passionate about using technology to solve customer problems and creating a more equitable and sustainable world."