In [111]:
import google.generativeai as genai
import textwrap
import numpy as np
import pandas as pd

# Used to securely store your API key

from IPython.display import Markdown

In [112]:
genai.configure(api_key='AIzaSyAQH-5fbJEpH2K_D0FafxzIsv0BPp4NJaM')

In [113]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


In [114]:
import json

with open('../scraper/scraped_data/data_requests.json', 'r') as file:
    data = json.load(file)

# data is a list, with each item being a dictionary. key-value pair corresponds to page element and content of page

In [115]:
cleaned_data = {}

for item in data:
    cleaned_title = ''
    for (key, value) in item.items():
        if key == 'title':
            cleaned_title = value
            break
    cleaned_txt = []
    for text in item['texts']:
        cleaned_txt.append(text.lower())
    item['texts'] = cleaned_txt
    cleaned_data[cleaned_title] = item

In [116]:
import string

for (key, item) in cleaned_data.items():
    cleaned_txt = []
    for text in item['texts']:
        cleaned_txt.append(text.translate(str.maketrans('', '', string.punctuation)))
    item['texts'] = cleaned_txt
    cleaned_data[key] = item

In [117]:
training_data = []

for page_title, data in cleaned_data.items():
    training_data.append({
        'title': page_title,
        'texts': ' '.join(data['texts']).removeprefix('about us overview our team organisation structure contributions services downloads gallery image gallery video gallery news  events donate us vacancy faqs contact us inquiry contact details sitemap සිංහල தமிழ் about us overview our team organisation structure contributions services downloads gallery image gallery video gallery news  events donate us vacancy faqs contact us inquiry contact details sitemap '),
    })

In [118]:
training_df = pd.DataFrame(training_data)
training_df = training_df.drop([10])
training_df

Unnamed: 0,title,texts
0,Shrama Vasana Fund - Home,health clinics eye clinics eye clinic empower...
1,Shrama Vasana Fund - Overview,home about us overview overview vision contend...
2,Shrama Vasana Fund - Contributions,home contributions contributions services supp...
3,Shrama Vasana Fund - Services,home services services promotion of the welfar...
4,Shrama Vasana Fund - Downloads,home downloads downloads acts circulars acts ...
5,Shrama Vasana Fund - Image Gallery,home gallery image gallery image gallery shram...
6,Shrama Vasana Fund - Video Gallery,home gallery video gallery video gallery falan...
7,Shrama Vasana Fund - News & Events,home news events there are no articles in thi...
8,Shrama Vasana Fund - Donate Us,home donate us donate us services supported by...
9,Shrama Vasana Fund - Vacancy,home vacancy vacancy filters display 5 10 15 ...


In [119]:
model = 'models/embedding-001'

def embed_fn(title, text):
  return genai.embed_content(model=model,
                             content=text,
                             task_type="retrieval_document",
                             title=title)["embedding"]

In [120]:
training_df['Embeddings'] = training_df.apply(lambda row: embed_fn(row['title'], row['texts']), axis=1)
training_df

Unnamed: 0,title,texts,Embeddings
0,Shrama Vasana Fund - Home,health clinics eye clinics eye clinic empower...,"[0.060573, -0.015612332, -0.06532606, -0.00336..."
1,Shrama Vasana Fund - Overview,home about us overview overview vision contend...,"[0.04076333, -0.025704147, -0.073924504, -0.01..."
2,Shrama Vasana Fund - Contributions,home contributions contributions services supp...,"[0.03980498, -0.01397844, -0.03992281, -0.0097..."
3,Shrama Vasana Fund - Services,home services services promotion of the welfar...,"[0.047610294, -0.016107777, -0.021704009, 0.00..."
4,Shrama Vasana Fund - Downloads,home downloads downloads acts circulars acts ...,"[0.029383786, -0.0035986549, -0.02318381, -0.0..."
5,Shrama Vasana Fund - Image Gallery,home gallery image gallery image gallery shram...,"[0.05432693, 0.013698386, -0.021377414, -0.005..."
6,Shrama Vasana Fund - Video Gallery,home gallery video gallery video gallery falan...,"[0.058101248, 0.021284133, -0.04773684, 0.0179..."
7,Shrama Vasana Fund - News & Events,home news events there are no articles in thi...,"[0.048141394, -0.004948371, -0.04932717, 0.010..."
8,Shrama Vasana Fund - Donate Us,home donate us donate us services supported by...,"[0.061574273, 0.0057453113, -0.049205527, 0.00..."
9,Shrama Vasana Fund - Vacancy,home vacancy vacancy filters display 5 10 15 ...,"[0.045427363, -0.00027735182, -0.04403984, -0...."


In [121]:
query = "what does the Shrama Vasana Fund do?"
model = 'models/embedding-001'

request = genai.embed_content(model=model,
                              content=query,
                              task_type="retrieval_query")

In [122]:
def find_best_passage(query, dataframe):
  query_embedding = genai.embed_content(model=model,
                                        content=query,
                                        task_type="retrieval_query")
  dot_products = np.dot(np.stack(dataframe['Embeddings']), query_embedding["embedding"])
  idx = np.argmax(dot_products)
  return dataframe.iloc[idx]['texts']

In [123]:
passage = find_best_passage(query, training_df)
passage

'home contributions contributions services supported by your donations granting scholarships to the value of rs 60000 to the children of those who die in service or are permanently disabled during servicecontributing to the development of a healthy workforce by conducting health clinics and eye clinics etc conduct empowerment programs for workers in low income groups in order to improve their economic conditions grant legal aid to the value of rs 10000 and assist the employees whose services have been terminated in order that they may seek redress through legal action in coasts of law when the place of employment has been closed down without prior notice provide facilities for acquiring professional qualifications including provision of other connected assistance to the workers in order that they may secure employment in an alternative place of work provide required assistance to workers when they encounter natural disasters such as floods earthquakes land slides fire outbreaks render 

In [124]:
def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = textwrap.dedent("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

In [125]:
prompt = make_prompt(query, passage)
print(prompt)

You are a helpful and informative bot that answers questions using text from the reference passage included below.   Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.   However, you are talking to a non-technical audience, so be sure to break down complicated concepts and   strike a friendly and converstional tone.   If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: 'what does the Shrama Vasana Fund do?'
  PASSAGE: 'home contributions contributions services supported by your donations granting scholarships to the value of rs 60000 to the children of those who die in service or are permanently disabled during servicecontributing to the development of a healthy workforce by conducting health clinics and eye clinics etc conduct empowerment programs for workers in low income groups in order to improve their economic conditions grant legal aid to the value of rs 10000 and assist the employees whose service

In [126]:
gemini_model = genai.GenerativeModel('gemini-1.5-pro-latest')
answer = gemini_model.generate_content(prompt)
Markdown(answer.text)

The Shrama Vasana Fund is an organization in Sri Lanka that helps workers in a variety of ways, such as providing scholarships for children of workers who died or were disabled on the job, running health clinics, offering legal and financial aid, and assisting workers who have lost their jobs due to workplace closures or natural disasters. 


In [127]:
query = 'What are the services that the Shrama Vasana Fund offers?'
passage = find_best_passage(query, training_df)
prompt = make_prompt(query, passage)
answer = gemini_model.generate_content(prompt)
Markdown(answer.text)

The Shrama Vasana Fund offers a variety of services to support workers in Sri Lanka. For example, they promote worker well-being through health and eye clinics, along with awareness programs about important topics like industrial safety and hygiene. They also offer financial aid to workers during emergencies, such as workplace closures or natural disasters. Additionally, they provide educational opportunities, like professional courses, to help workers find new employment. Finally, they recognize and appreciate the contributions of individuals who champion worker welfare. 


In [128]:
query = 'Does the fund have any vacancy at the momment?'
passage = find_best_passage(query, training_df)
prompt = make_prompt(query, passage)
answer = gemini_model.generate_content(prompt)
Markdown(answer.text)

Yes, it seems the fund has some job openings!  The passage mentions that there are "vacancies extended till 20240426".  While the exact date format is a little unclear, it looks like they are accepting applications for a while longer. 👍 
