In [109]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.embeddings import SentenceTransformerEmbeddings
from transformers import pipeline
from datetime import datetime
import csv
import os
import google.generativeai as genai

In [68]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [62]:
file_path = 'benori_text.txt'
with open(file_path, 'r', encoding='utf-8') as f:
    text = f.read()
    # Split into smaller chunks for better retrieval without headlines
    chunks = text.split('\n\n\n')  # Reduced to 300
    # <question>_q_ <answer>
    qa_pairs = []
    for chunk in chunks:
      question, answer = chunk.split('_q_', 1)  # Split at the first '_q_' only
      qa_pairs.append({'question': question.strip(), 'answer': answer.strip()})
      print(question)
    df = pd.DataFrame(qa_pairs)
df

About Benori

The Origin Of Our Name

Our Values

Industries we work in


Unnamed: 0,question,answer
0,About Benori,"Powered by digital agility, deep research capa..."
1,The Origin Of Our Name,Benori is a composite of two words – Bene (Lat...
2,Our Values,1) Excellence in client delivery.\n2) Agility ...
3,Industries we work in,Professional Services: Consulting and Advisory...


In [63]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')

df['q_emb'] = df.question.apply(lambda x: embedder.encode(x))
df['a_emb'] = df['answer'].apply(lambda x: embedder.encode(x))
df

Unnamed: 0,question,answer,q_emb,a_emb
0,About Benori,"Powered by digital agility, deep research capa...","[-0.01970333, 0.025244396, -0.07787554, -0.014...","[0.006307535, -0.05050508, -0.055253927, -0.04..."
1,The Origin Of Our Name,Benori is a composite of two words – Bene (Lat...,"[-0.082969956, 0.04446866, -0.043638352, 0.039...","[0.02205597, -0.002585406, -0.036837827, 0.004..."
2,Our Values,1) Excellence in client delivery.\n2) Agility ...,"[-0.016780507, 0.10757069, -0.086472414, -0.01...","[0.012495399, -0.0035959575, -0.0080137495, -0..."
3,Industries we work in,Professional Services: Consulting and Advisory...,"[0.0053044516, -0.06823793, -0.0014293456, 0.0...","[0.013488348, -0.13495222, -0.0060564876, 0.01..."


In [72]:
# Create documents for FAISS
documents = [Document(page_content=question, metadata={'answer': answer})
             for question, answer in zip(df.question, df.answer)]


embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(documents=documents, embedding=embeddings)
db.save_local('faiss_index')

In [84]:
query = "what does it do?"
docs_and_scores = db.similarity_search_with_score(query, k=3)  # Get top 3 results

q_res = []
a_res = []
for doc, score in docs_and_scores:
  q_res.append(doc.page_content)
  a_res.append(doc.metadata['answer'])
  print(f"Question: {doc.page_content}, Answer: {doc.metadata['answer']}, Score: {score}")

Question: Industries we work in, Answer: Professional Services: Consulting and Advisory Firms, Law Firms, Industry Associations

Financial Services: Banking, Insurance, Private Equity and Venture Capital, Wealth and Asset Management, Other Financial Institutions

Technology & Internet: Software and Services, Telecom, Digital Businesses, E-Commerce

Consumer & Retail: Food & Beverage, Beauty & Personal Care, Home Care, Consumer Durables, Retail & Lifestyle

Industrials & Manufacturing: Chemicals, Pharmaceuticals, Automotive, Hi-Tech, Logistics

Global Capability Centers (GCCs): Parent Firms, GCCs, Partner firms, Score: 1.6327980756759644
Question: Our Values, Answer: 1) Excellence in client delivery.
2) Agility and continuous innovation.
3) Integrity and highest standards of ethical behaviour.
4) Collaboration, trust and transparency.
5) People first., Score: 1.7530332803726196
Question: About Benori, Answer: Powered by digital agility, deep research capabilities and a highly experience

In [80]:
GEMINI_API = userdata.get('gemini-ai')
genai.configure(api_key=GEMINI_API)

In [98]:
model = genai.GenerativeModel()
response = model.generate_content('hi, how are you')
print(response)

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "I'm doing well, thank you for asking!  How are you today?\n"
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "avg_logprobs": -0.06681840287314521
        }
      ],
      "usage_metadata": {
        "prompt_token_count": 5,
        "candidates_token_count": 18,
        "total_token_count": 23
      },
      "model_version": "gemini-1.5-flash-002"
    }),
)


In [87]:
BOILER_PROMPT = f'given the query: {query}, here are some relevant question answers taken: {q_res[:3]}-{a_res[:3]}, your task is to provide relevant information from these answers and generate response.'

### Inference

In [100]:
response = model.generate_content(BOILER_PROMPT)
print(response.text)

Benori is a research firm that provides multilingual research services to global clients across various industries.  Their services utilize a 360-degree approach combining primary and secondary research with data modeling to offer insights into market trends, competitive landscapes, regulatory changes, and technological advancements.  They work with clients in sectors including professional services (consulting, law, associations), financial services (banking, insurance, etc.), technology, consumer & retail, industrials & manufacturing, and global capability centers.  Their values emphasize excellence in client delivery, agility, innovation, integrity, collaboration, trust, and transparency, prioritizing their people.



### Sending Email

In [131]:
import smtplib
from email.mime.text import MIMEText

from google.colab import userdata

In [132]:
sender_id, sender_pass = (userdata.get('email_id'), userdata.get('email_id_pass'))
receiver_id = 'aryan.k.shishodia@gmail.com'
smtp_server = 'smtp.gmail.com'
smtp_port = 587

In [134]:
mail_body = f'This is an automated mail.\n Given query: {query};\n Answer: {response.text}'
msg = MIMEText(mail_body)
msg['Subject'] = 'Benori LLM Query'
msg['From'] = sender_id
msg['To'] = receiver_id

server = smtplib.SMTP(smtp_server, smtp_port)
server.starttls()
server.login(sender_id, sender_pass)
server.sendmail(sender_id, receiver_id, msg.as_string())
print("Email sent successfully.")

Email sent successfully.
