In [64]:
import google.generativeai as genai
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
class GeminiModel:
    def __init__(self, gemini_api , model_name):
        # Configure the API with the provided key
        genai.configure(api_key= os.getenv("GEMINI_API_KEY"))
        
        # Default configuration settings; can be customized further if needed
        generation_config = {
            "temperature": 0,
            "top_p": 1,
            "top_k": 1,
            "max_output_tokens": 30720,
        }
        
        safety_settings = [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_ONLY_HIGH"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_ONLY_HIGH"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"},
        ]
        
        # Set up the model with the provided model name
        self.model = genai.GenerativeModel(model_name="gemini-1.5-flash",
                                           generation_config=generation_config,
                                           safety_settings=safety_settings)

    def generate_content(self, prompt):
        # Generate content based on the provided prompts
        response = self.model.generate_content([prompt])
        return response.text

convert { User_Query ===> Re-structured query }

In [5]:
from pathlib import Path

GM = GeminiModel(os.getenv("GEMINI_API_KEY"), "gemini-1.5-flash")

translator_prompt_template = Path(r"D:\projects\Serious_Banarasia\prompts\translator\translator_prompt.txt")
if not translator_prompt_template.is_file():
    raise FileNotFoundError(f"Prompt file not found at {translator_prompt_template}")

with open(translator_prompt_template, "r", encoding="utf-8") as f:
    translator_prompt_template = f.read()
    formatted_translator_prompt_template = translator_prompt_template.format(
        user_query='Show me pictures of some famous ghats in varanasi'
    )

english_refined_result = GM.generate_content(formatted_translator_prompt_template)
print(english_refined_result)

Show me pictures of some famous ghats in Varanasi.



Query Keyword Seggregation

In [10]:
# Load and format the query keywords segregator template

import json

query_keywords_seggregator_template = Path(r"D:\projects\Serious_Banarasia\prompts\query_router\query_keywords_seggregator.txt")
if not query_keywords_seggregator_template.is_file():
    raise FileNotFoundError(f"Prompt file not found at {query_keywords_seggregator_template}")

with open(query_keywords_seggregator_template, "r", encoding="utf-8") as f:
    query_keywords_seggregator_template = f.read()
    formatted_query_keywords_seggregator_template = query_keywords_seggregator_template.format(
        re_structured_query=english_refined_result
    )
    
    
print(formatted_query_keywords_seggregator_template)


keywords_result = {}
keywords_result = GM.generate_content(formatted_query_keywords_seggregator_template)
print(keywords_result)



try:
    start_index = keywords_result.find("{")
    end_index = keywords_result.rfind("}") + 1
    json_content = keywords_result[start_index:end_index]
    keywords_result_dict = json.loads(json_content)
    print("Parsed JSON:", keywords_result_dict)
except json.JSONDecodeError as e:
    print("JSONDecodeError:", e)
    print("Invalid JSON content:", keywords_result)
except Exception as e:
    print("Error:", e)
    print("Generated Content:", keywords_result)



You are given a query: Show me pictures of some famous ghats in Varanasi.
 and are tasked to classify the query into a maximum of 2 classes: "text_api, image_api" if the query demands or requires classification into these API classes.

When converting the query to keywords, ensure that:

Keywords preserve the complete semantic meaning of the original query.
No keyword segmentation should lead to a loss of context or produce irrelevant results when performing a search.
Group words that form meaningful phrases together, avoiding unnecessary splitting.
Return the output in the following structure:

json
{
    "api_needed": 0,
    "api_name": ["keyword_1", "keyword_2", ... "keyword_n"],
    "api_name": ["keyword_1", "keyword_2", ... "keyword_n"]
}

Example:

Query: images of famous Varanasi ghats

Correct Response:
json
{
    "api_needed": 1,
    "image_api": ["famous Varanasi ghats"]
}


Incorrect Response:
json
{
    "api_needed": 1,
    "image_api": ["images", "Varanasi ghats", "famous 

In [11]:
#Serper tool for text_generation
import http.client


def serperquery(query):
    conn = http.client.HTTPSConnection("google.serper.dev")
    payload = json.dumps({
    "q": query
    })
    headers = {
    'X-API-KEY': os.getenv("SERPER_API_KEY"),
    'Content-Type': 'application/json'
    }
    conn.request("POST", "/search", payload, headers)
    res = conn.getresponse()
    data = res.read()
    return(data.decode("utf-8"))


#Serper tool for image generation

# def serper_img_query(keywords):
#   conn = http.client.HTTPSConnection("google.serper.dev")
#   payload = json.dumps({
#     "q": keywords,
#     "gl": "in"
#   })
#   headers = {
#     'X-API-KEY': os.getenv("SERPER_API_KEY"),
#     'Content-Type': 'application/json'
#   }
#   conn.request("POST", "/images", payload, headers)
#   res = conn.getresponse()
#   data = res.read()
#   return(data.decode("utf-8"))


In [12]:
def serper_img_query(keywords):
    conn = http.client.HTTPSConnection("google.serper.dev")
    payload = json.dumps({
        "q": keywords,
        "gl": "in",
        "type": "images",  # Add this to specify image search
        "engine": "google",
        "num": 10         # Number of results to return
    })
    headers = {
        'X-API-KEY': os.getenv("SERPER_API_KEY"),
        'Content-Type': 'application/json'
    }
    conn.request("POST", "/images", payload, headers)
    res = conn.getresponse()
    data = res.read()
    return data.decode("utf-8")

Hit respective serper api's and store the results 

In [13]:
print(type(keywords_result_dict))

<class 'dict'>


In [14]:
def route_keywords(keywords): 
    # Initialize results dictionary with only needed categories
    results = {"text_api": [], "image_api": []}
    
    # Skip api_needed as it's a control parameter, not a category for processing
    categories_to_process = {k: v for k, v in keywords.items() if k != 'api_needed'}
    
    # Iterate through the filtered input dictionary
    for category, keyword_list in categories_to_process.items():
        if category not in results:
            print(f"Warning: Unknown category {category} encountered")
            continue
            
        # Process each keyword in the category's keyword list
        for keyword in keyword_list:
            try:
                if category == "text_api":
                    result = serperquery(keyword)
                    # Parse the JSON string returned by serperquery
                    if isinstance(result, str):
                        result = json.loads(result)
                elif category == "image_api":
                    print(keyword)
                    result = serper_img_query(keyword)
                    # Parse the JSON string returned by serper_img_query
                    if isinstance(result, str):
                        result = json.loads(result)
                
                # Add the result to the appropriate category
                results[category].append({keyword: result})
            except json.JSONDecodeError as je:
                print(f"JSON decode error for {keyword}: {str(je)}")
                continue
            except Exception as e:
                print(f"Error processing {keyword} for {category}: {str(e)}")
                continue
    
    # Save results to JSON file with proper error handling
    try:
        with open("keywords_result_dict.json", "w", encoding='utf-8') as f:
            json.dump(results, f, indent=4, ensure_ascii=False)
        print("Results successfully saved to keywords_result_dict.json")
    except Exception as e:
        print(f"Error saving results to JSON: {str(e)}")
    
    return results
    

In [15]:
results = route_keywords(keywords_result_dict)
print(results)

famous ghats in Varanasi
Results successfully saved to keywords_result_dict.json
{'text_api': [], 'image_api': [{'famous ghats in Varanasi': {'searchParameters': {'q': 'famous ghats in Varanasi', 'gl': 'in', 'type': 'images', 'num': 10, 'engine': 'google'}, 'images': [{'title': '9 Important Ghats in Varanasi that You Must See', 'imageUrl': 'https://www.tripsavvy.com/thmb/tJ6ruJDKISZf3tZn6jx__HeMUMY=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/GettyImages-128253674-592156025f9b58f4c0d79bfd.jpg', 'imageWidth': 1500, 'imageHeight': 997, 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRlJlXNM9slC9luK10U6g6YkVkdEMtaZa3cZBkAgxpHel_q_fJk&s', 'thumbnailWidth': 275, 'thumbnailHeight': 183, 'source': 'TripSavvy', 'domain': 'www.tripsavvy.com', 'link': 'https://www.tripsavvy.com/must-see-ghats-in-varanasi-1539761', 'googleUrl': 'https://www.google.com/imgres?imgurl=https%3A%2F%2Fwww.tripsavvy.com%2Fthmb%2FtJ6ruJDKISZf3tZn6jx__HeMUMY%3D%2F1500x0%2Ffilters%3Ano_

In [16]:
def parse_api_results(json_data):
    """
    Parse JSON data from different APIs and extract organic results
    Args:
        json_data (dict): Raw JSON response from APIs
    Returns:
        dict: Cleaned organic results grouped by API type
    """
    parsed_results = {}
    
    try:
        # Iterate through each API type (text_api, image_api etc)
        for api_type, responses in json_data.items():
            parsed_results[api_type] = []
            
            # Handle each response for the API type
            for response in responses:
                try:
                    # Extract first key-value pair (query and its results)
                    query = next(iter(response))
                    results = response[query]
                    
                    # Extract organic results if they exist
                    if 'organic' in results:
                        cleaned_results = []
                        for result in results['organic']:
                            cleaned_result = {
                                'title': result.get('title', ''),
                                'link': result.get('link', ''),
                                'snippet': result.get('snippet', '')
                            }
                            cleaned_results.append(cleaned_result)
                        
                        parsed_results[api_type].append({
                            'results': cleaned_results
                        })
                    elif 'images' in results:
                        cleaned_results = []
                        for result in results['images']:
                            cleaned_result = {
                                'title': result.get('title', ''),
                                'link': result.get('link', ''),
                                'snippet': result.get('snippet', '')
                            }
                            cleaned_results.append(cleaned_result)
                        
                        parsed_results[api_type].append({
                            'results': cleaned_results
                        })
                        
                except (AttributeError, KeyError, StopIteration) as e:
                    print(f"Error parsing response in {api_type}: {str(e)}")
                    continue
                    
    except Exception as e:
        print(f"Error parsing JSON data: {str(e)}")
        return {}
        
    return parsed_results


In [17]:
with open('keywords_result_dict.json', 'r') as f:
    data = json.load(f)
    results = parse_api_results(data)
    print(data)

{'text_api': [], 'image_api': [{'famous ghats in Varanasi': {'searchParameters': {'q': 'famous ghats in Varanasi', 'gl': 'in', 'type': 'images', 'num': 10, 'engine': 'google'}, 'images': [{'title': '9 Important Ghats in Varanasi that You Must See', 'imageUrl': 'https://www.tripsavvy.com/thmb/tJ6ruJDKISZf3tZn6jx__HeMUMY=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/GettyImages-128253674-592156025f9b58f4c0d79bfd.jpg', 'imageWidth': 1500, 'imageHeight': 997, 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRlJlXNM9slC9luK10U6g6YkVkdEMtaZa3cZBkAgxpHel_q_fJk&s', 'thumbnailWidth': 275, 'thumbnailHeight': 183, 'source': 'TripSavvy', 'domain': 'www.tripsavvy.com', 'link': 'https://www.tripsavvy.com/must-see-ghats-in-varanasi-1539761', 'googleUrl': 'https://www.google.com/imgres?imgurl=https%3A%2F%2Fwww.tripsavvy.com%2Fthmb%2FtJ6ruJDKISZf3tZn6jx__HeMUMY%3D%2F1500x0%2Ffilters%3Ano_upscale()%3Amax_bytes(150000)%3Astrip_icc()%2FGettyImages-128253674-592156025f9b5

In [18]:
print(results)

{'text_api': [], 'image_api': [{'results': [{'title': '9 Important Ghats in Varanasi that You Must See', 'link': 'https://www.tripsavvy.com/must-see-ghats-in-varanasi-1539761', 'snippet': ''}, {'title': 'Ghats in Varanasi - Wikipedia', 'link': 'https://en.wikipedia.org/wiki/Ghats_in_Varanasi', 'snippet': ''}, {'title': 'The 85 Ghats of Banaras – Kevin Standage', 'link': 'https://kevinstandagephotography.wordpress.com/2020/05/22/the-ghats-of-banaras-varanasi-kashi/', 'snippet': ''}, {'title': 'Exploring the beautiful ghats of Varanasi | TimesTravel', 'link': 'https://timesofindia.indiatimes.com/travel/destinations/exploring-the-beautiful-ghats-of-varanasi/articleshow/107570888.cms', 'snippet': ''}, {'title': 'Top 5 Must-Visit Ghats in Varanasi | India.com', 'link': 'https://www.india.com/travel/articles/top-5-must-visit-ghats-in-varanasi-3608454/', 'snippet': ''}, {'title': "Between Heaven and Earth - Exploring Varanasi's Iconic Ghats", 'link': 'https://www.savaari.com/blog/ghats-of-var

In [178]:
final_prompt_template = Path(r"D:\projects\Serious_Banarasia\prompts\final_prompt.txt")
if not final_prompt_template.is_file():
    raise FileNotFoundError(f"Prompt file not found at {final_prompt_template}")

with open(final_prompt_template, "r", encoding="utf-8") as f:
    final_prompt_template = f.read()
    final_prompt_template = final_prompt_template.format(
        results = results,
        query = 'Show me pictures of some famous ghats in varanasi'
        
    )
keywords_result = GM.generate_content(final_prompt_template)
print(keywords_result)

Arre baba!  Aap Varanasi ke ghat dekhna chahte hain?  Wah!  Bahut badiya!  Aapke liye toh main pura Varanasi ghuma dunga!  Dekhiye na, yeh sab websites pe ghaton ki itni saari jankari hai...  (shows a few links from the provided data, perhaps even displaying images if possible through a suitable interface):

*  "9 Important Ghats in Varanasi that You Must See" -  yeh dekhiye, tripsavvy pe!  Sabse important ghat, saare bataye hain.
*  "Ghats in Varanasi - Wikipedia" -  Wikipedia pe bhi hai,  pura itihaas mil jaayega aapko!
*  "Between Heaven and Earth - Exploring Varanasi's Iconic Ghats" - Savaari ke blog pe bhi hai,  bahut achcha likha hai.
*  Aur dekhiye, SOTC, Times of India, The Floating Pebbles, India.com... sabne Varanasi ke ghaton pe likha hai!  Har ek website pe kuch na kuch nayi baat milegi aapko.  

(Shivendra pauses, smiles warmly, and leans in conspiratorially)

Sunaiye, main aapko ek baat bataun?  Yeh ghat sirf pathar ke tukde nahin hain, sahib!  Yeh toh Varanasi ki jaan ha

In [19]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

completion = client.chat.completions.create(
            model = "chatgpt-4o-latest",
            messages=[
                {"role": "developer", "content": "You are a helpful assistant."},
                {
                    "role": "user",
                    "content": "hi"
                }
            ]
        )

In [20]:
completion.choices[0].message.content

'Hello! How can I assist you today? 😊'

In [21]:
from translator.translator import Translator
from pathlib import Path
import os

In [22]:

path = Path("prompts/translator/translator_prompt.txt")

# tr = Translator(api_key=os.getenv("GEMINI_API_KEY"),model_type="gemini", model_name="gemini-1.5-flash", prompt_template_path=path)
tr = Translator(api_key=os.getenv("OPENAI_API_KEY"),model_type="openai", model_name="gpt-3.5-turbo-1106", prompt_template_path=path)

In [23]:
print(tr.restructure_query("Hi, I want to know about Varanasi"))

Sure, here is the translated query:

Query: Hi, I want to know about Varanasi

Translation: Hi, I want to know about Varanasi


In [24]:
path_sgr = Path("prompts/query_router/query_keywords_seggregator.txt")

In [25]:
from keywords_Segregator.segregator import Segregator

SGR = Segregator(api_key=os.getenv("OPENAI_API_KEY"),model_type="openai", model_name="gpt-3.5-turbo-1106", prompt_template_path=path_sgr)

In [26]:
SGR.load_prompt_template()

'You are given a query: {re_structured_query} and are tasked to classify the query into a maximum of 2 classes: "text_api, image_api" if the query demands or requires classification into these API classes.\n\nWhen converting the query to keywords, ensure that:\n\nKeywords preserve the complete semantic meaning of the original query.\nNo keyword segmentation should lead to a loss of context or produce irrelevant results when performing a search.\nGroup words that form meaningful phrases together, avoiding unnecessary splitting.\nReturn the output in the following structure:\n\njson\n{{\n    "api_needed": 0,\n    "api_name": ["keyword_1", "keyword_2", ... "keyword_n"],\n    "api_name": ["keyword_1", "keyword_2", ... "keyword_n"]\n}}\n\nExample:\n\nQuery: images of famous Varanasi ghats\n\nCorrect Response:\njson\n{{\n    "api_needed": 1,\n    "image_api": ["famous Varanasi ghats"]\n}}\n\n\nIncorrect Response:\njson\n{{\n    "api_needed": 1,\n    "image_api": ["images", "Varanasi ghats", 

In [27]:
keywords =  SGR.keywords_seggregator("I want to know about Varanasi")

In [46]:
print(keywords)

{'api_needed': 1, 'text_api': ['history of Varanasi']}


In [47]:
from query_router.router import QueryRouter
from dotenv import load_dotenv
load_dotenv()
QR = QueryRouter(keywords=keywords, serper_api_key=os.getenv("SERPER_API_KEY"))

In [48]:
categories = {k: v for k, v in keywords.items() if k != 'api_needed'}

In [55]:
print(keywords)

{'api_needed': 1, 'text_api': ['history of Varanasi']}


In [63]:
categories.items()
for i, j in categories.items():
    print("this is i:", i)
    print("this is j:", j)

this is i: text_api
this is j: ['history of Varanasi']


In [53]:
new_result = QR.route_keywords()

Error processing history of Varanasi: expected string or bytes-like object, got 'NoneType'


In [None]:
def route_keywords(Keywords):
        # Filter out control parameters
    results = {'text_api': []}
    categories = {k: v for k, v in keywords.items() if k != 'api_needed'}

    for category, keyword_list in categories.items():
        if category not in results:
            print(f"Unknown category: {category}")
            continue

        for keyword in keyword_list:
            try:
                    
                result = None
                    # Route to appropriate API
                if category in ['text_api', 'search_api']: 
                    result = .serper_client.search_query(keyword)
                    print(result)

In [54]:
print(new_result)

{'text_api': [], 'image_api': [], 'search_api': []}
