In [1]:
#!pip install google-genai
#!pip install pymongo

In [2]:
import pandas as pd
import pymysql
import sqlalchemy
import pymongo
from pymongo import MongoClient
import os
from google import genai
import json

## API Set Up: Google Gemini LLM API

In [3]:
# Set up API key for Google Gemini LLM API
# Read in the key from json file
def load_config():
    with open("config.json", "r") as file:
        return json.load(file)

In [4]:
# load the key
config = load_config()
API_KEY = config["API_KEY"]

In [5]:
# Set up the AI model we want to use
class Custom_GenAI:

    def __init__(self, API_KEY):
        
        self.client = genai.Client(api_key=API_KEY)

    def ask_ai(self, question):
        response = self.client.models.generate_content(
        model="gemini-2.0-flash",
        contents=question,
        )

        return response.text

In [6]:
ai = Custom_GenAI(API_KEY)

### Test the AI

In [7]:
# Test whether the AI works
ques = "How many states are in US, give me simple answer"
res = ai.ask_ai(ques)
print(res)

There are 50 states in the US.



## Database Connection

### MySQL & MongoDB NLI

In [8]:
# MySQL Connection 
apt = sqlalchemy.create_engine("mysql+pymysql://root:Dsci-551@localhost/aptadditional")
# MongoDB Connection 
client = MongoClient("mongodb://localhost:27017/")

In [9]:
class Custom_GenAI:

    def __init__(self, API_KEY):
        self.client = genai.Client(api_key=API_KEY)

    def ask_ai(self, question):
        prompt = f"""
You are a natural language interface for both MySQL and MongoDB databases.

The user has access to the following databases and their respective tables/collections:

MySQL Database: aptadditional
- amenities (amenity_id, amenity_name)
- property_amenities (id, amenity_id)
- pricing (id, price, currency)
- price_details (id, price_display, price_type)
- pets (id, pets_allowed, fee)
    - `pets_allowed` includes only: 'Cats,Dogs', 'No pets allowed', 'Cats', or 'Dogs'
    - `fee` includes 'Yes' or 'No' for extra fees

MongoDB Database: rental
- general_info (id, title, body)
    - `title` is a short description of the apartment
    - `body` is the long description of the apartment
- location (id, cityname, state, latitude, longitude)
    - `state` uses U.S. state abbreviations like 'CA', 'NY', 'NC'
- property_details (id, square_feet, bedrooms, bathrooms)
- media (id, has_photo)
- sources (id, source, time)

Assume mySQL has initialized as:
apt = sqlalchemy.create_engine("mysql+pymysql://root:Dsci-551@localhost/aptadditional")

Assume MongoClient has been initialized as:
client = MongoClient("mongodb://localhost:27017/")

Your task:
- Determine whether the query should run on the MySQL or MongoDB database
- Return a valid JSON object with two keys:
  - "engine": either "mysql" or "mongodb"
  - "query": a single-line SQL or a **single-line valid PyMongo command**

Pymongo Supported commands:
- `.find(filter, projection)` for simple filtering
- `.aggregate([...])` for advanced operations using `$match`, `$group`, `$sort`, `$limit`, `$skip`, `$project`, `$lookup`
- `.insert_one`, `.insert_many`, `.update_one`, `.delete_one` for data modification
- Do **not** use unsupported methods like `.count_documents()`, `.find_one()`, etc.
- If the requested fields are from different collections, use `$lookup` in the aggregation query to join them based on a common key (usually `id`).
- Do not use `.aggregate()` for simple lookups where `.find()` with projection would suffice.

Always:
- Use fully qualified MySQL table names (e.g., `additionalInfo.pricing`)
- Format the response as a single-line JSON 
- Use **PyMongo command only** for MongoDB: Use PyMongo syntax with client[...] for all MongoDB collections.
Example: client['rental'].aggregate([...])


**DO NOT** explain the query.  
**DO NOT** use natural language.  
**DO NOT** return markdown.

---

Question: {question}
"""
        try:
            response = self.client.models.generate_content(
            model="gemini-2.0-flash",
            contents=prompt
            )
            
            raw_output = response.text.strip()
            
            # Handle common formatting issues
            if raw_output.startswith("```json"):
                raw_output = raw_output.replace("```json", "").replace("```", "").strip()
            
            parsed = json.loads(raw_output)
            return parsed  

        except json.JSONDecodeError as e:
            print("Failed to parse LLM output as JSON:", response.text)
            raise e

In [10]:
ai = Custom_GenAI(API_KEY)

print("Welcome to the Unified Database Natural Language Interface!")
print("Type 'exit' to quit.\n")

while True:
    question = input("Enter your question: ")
    if question.lower() == "exit":
        print("Thank you and bye!")
        break

    try:
        response = ai.ask_ai(question)
        
        print("\nEngine Selected:", response["engine"])
        print("Generated Query:", response["query"])

        if response["engine"] == "mysql":
            query = response["query"].replace("%", "%%") 

            if query.strip().lower().startswith("select"):
                result_sql = pd.read_sql(query, apt)
                print("\nMySQL Query Result:")
                display(result_sql.head())
            else:
                try:
                    with apt.connect() as conn:
                        with conn.begin():
                            conn.execute(sqlalchemy.text(query))
                    print("Modification query executed successfully.")
                except Exception as e:
                    print("Execution error:", e)
        elif response["engine"] == "mongodb":
            mongo_query = response["query"].replace("```python", "").replace("```", "").strip()
            try:
                result = eval(mongo_query)
                
                # Handle Data Modification Section
                if isinstance(result, pymongo.results.InsertOneResult):
                    print(f"Document inserted.")
                elif isinstance(result, pymongo.results.InsertManyResult):
                    print(f"Documents inserted.")
                elif isinstance(result, pymongo.results.UpdateResult):
                    print(f"Documents matched: {result.matched_count}, modified: {result.modified_count}")
                elif isinstance(result, pymongo.results.DeleteResult):
                    print(f"Documents deleted: {result.deleted_count}")

                elif isinstance(result, (pymongo.cursor.Cursor, list)):
                    result_mongo = list(result)
                    print("\nMongoDB Query Result:")
                    if len(result_mongo) == 1 and isinstance(result_mongo[0], dict):
                        # print any key-value result
                        for k, v in result_mongo[0].items():
                            print(f"{k.replace('_', ' ').capitalize()}: {v}")
                    elif result_mongo:
                        for doc in result_mongo:
                            print(doc)
                    else:
                        print("No results found.")
            except Exception as e:
                print("MongoDB query execution error:", e)
        else:
            print("Unknown engine specified in response.")
    except json.JSONDecodeError:
        print("Error: LLM output could not be parsed as JSON.")
    except Exception as e:
        print("Execution error:", e)

Welcome to the Unified Database Natural Language Interface!
Type 'exit' to quit.



Enter your question:  Can you show me the ID and source for listings 'apt888' and 'apt889'



Engine Selected: mongodb
Generated Query: client['rental']['sources'].find({'id': {'$in': ['apt888', 'apt889']}}, {'id': 1, 'source': 1, '_id': 0})

MongoDB Query Result:
{'id': 'apt888', 'source': 'Yahoo'}
{'id': 'apt889', 'source': 'bing'}


Enter your question:  exit


Thank you and bye!
