In [None]:
# pip install wget
# pip install azure-search-documents 
# pip install azure-identity
# pip install openai


**Import required libraries**


In [8]:
import os
import json

from langchain_openai import AzureChatOpenAI
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings
from langchain.docstore.document import Document
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate


Load Langchain config values


In [2]:
LANGCHAIN_TRACING_V2 = "false"
LANGCHAIN_API_KEY = os.environ["LANGCHAIN_API_KEY"]


**Configure OpenAI settings**


In [3]:

azure_openai_api_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") 
azure_openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION") 
azure_openai_api_key = os.getenv("AZURE_OPENAI_KEY") 
#azure_openai_api_type="azure"
azure_openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION") 
azure_openai_deployment_model="gpt-4-32k"
azure_openai_embedding_model ="text-embedding-ada-002"


**Configure Azure AI Search Vector Store settings**


In [4]:
search_service_endpoint = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
search_service_api_key = os.getenv("AZURE_AI_SEARCH_KEY")
#credential = AzureKeyCredential(search_service_api_key)


**Create embeddings and vector store instances**


In [22]:
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_openai_embedding_model,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_api_endpoint,
    api_key=azure_openai_api_key,
)


**Create vector store instance**


In [23]:
index_name: str = "nl2sql-table_vector-store"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_service_endpoint,
    azure_search_key=search_service_api_key,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)


**Insert text and embeddings into vector store**


In [24]:
documents_tables = [Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "description": "The `hotels` table captures comprehensive information about various hotels that have partnered with our reservation platform. Each record in this table signifies a unique hotel entity, providing details about its name, location, and rating.", "example_queries": ["Show me all 5-star hotels.", "Find hotels near the airport.", "List hotels with a swimming pool.", "Which hotels offer free breakfast?", "Find pet-friendly hotels."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 1}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "rooms", "description": "The `rooms` table stores detailed information about the different types of rooms available in each hotel. Each row stands for a unique room type in a specific hotel.", "example_queries": ["List all available rooms for the weekend.", "Show me rooms with a sea view.", "Find rooms with a jacuzzi.", "Which rooms are wheelchair accessible?", "Show rooms with free Wi-Fi."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 2}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reservations", "description": "The `reservations` table chronicles all the bookings made by users. Each record stands for a unique reservation.", "example_queries": ["Retrieve all my past reservations.", "Cancel my reservation for tomorrow.", "Show upcoming reservations for this month.", "Find reservations made using discount code XYZ.", "List all cancelled reservations."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 3}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "customers", "description": "The `customers` table holds information about users who have made reservations. This includes customer ID, name, contact details, and preferences.", "example_queries": ["Find the contact details of customer with ID 123.", "Show the preferences of customers who frequently book 5-star hotels.", "List all customers from New York.", "Retrieve the email addresses of customers who have made more than 10 reservations.", "Who are the customers with the highest loyalty points?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 4}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "amenities", "description": "The `amenities` table contains data about the various amenities offered by each hotel. Amenities include things like swimming pools, gyms, and restaurants.", "example_queries": ["List all hotels with a gym facility.", "Which hotels offer both a swimming pool and a spa?", "Find hotels with a business center.", "Show hotels that offer valet parking.", "List hotels with child care services."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 5}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reviews", "description": "The `reviews` table stores customer feedback for hotels and rooms. Each entry represents a unique review given by a customer.", "example_queries": ["Show all reviews for Hotel XYZ.", "List hotels with an average rating above 4.", "Find reviews by customer ID 123.", "Show the most recent reviews for 5-star hotels.", "Retrieve reviews mentioning \'clean rooms\'."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 6}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "payments", "description": "The `payments` table records all payment transactions related to reservations. Each record stands for a unique payment.", "example_queries": ["List all payments made by customer ID 123.", "Find unsuccessful payment attempts.", "Show payments made via credit card.", "Retrieve all refunds issued last month.", "Find payments exceeding $1000."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 7}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "discounts", "description": "The `discounts` table contains information about various discount codes and promotions available.", "example_queries": ["List all active discount codes.", "Find discounts for military personnel.", "Show discounts applicable to 5-star hotels.", "Retrieve the usage history of discount code XYZ.", "Find discounts that expire this month."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 8}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "staff", "description": "The `staff` table holds information about hotel staff members. This includes their roles, contact details, and work schedules.", "example_queries": ["List all front desk staff.", "Show the work schedule of staff ID 456.", "Find staff trained in first aid.", "Retrieve contact details for the hotel manager.", "Who are the chefs in Hotel ABC?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 9}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "events", "description": "The `events` table provides details about events hosted by hotels, such as weddings, conferences, or shows.", "example_queries": ["Show all upcoming events at Hotel XYZ.", "List events with available tickets.", "Find events suitable for children.", "Which hotels are hosting business conferences?", "Retrieve events happening this weekend."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 10}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "services", "description": "The `services` table lists additional services offered by hotels, such as airport shuttle, laundry, or guided tours.", "example_queries": ["List all hotels offering airport shuttle.", "Which hotels have laundry service?", "Show services available at 5-star hotels.", "Find hotels offering guided tours.", "List hotels with 24-hour room service."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 11}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "inventory", "description": "The `inventory` table tracks the availability and status of rooms in each hotel.", "example_queries": ["Show available rooms for Hotel XYZ for the next week.", "List all rooms currently under maintenance.", "Find rooms that have been vacant for more than 30 days.", "Which hotels are fully booked for Christmas?", "Retrieve the last cleaning date for each room."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 12}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "loyalty_program", "description": "The `loyalty_program` table contains details about the rewards program, including points earned, redemption options, and member tiers.", "example_queries": ["Find customers who have enough points for a free stay.", "Show redemption options for 1000 points.", "List all Platinum members.", "Which customers are close to reaching the next tier?", "Retrieve the expiration dates for loyalty points."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 13}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "food_and_beverage", "description": "The `food_and_beverage` table stores information about dining options in hotels, including restaurants, bars, and room service menus.", "example_queries": ["List all hotels with a Michelin-starred restaurant.", "Show room service menus for Hotel XYZ.", "Find hotels with vegan options.", "Which hotels serve breakfast until 11 AM?", "Retrieve the wine list for Hotel ABC."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 14}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "billing", "description": "The `billing` table contains billing information for each reservation, including itemized costs, taxes, and additional charges.", "example_queries": ["Retrieve the billing details for reservation ID 789.", "List all reservations with pending payments.", "Find reservations with additional charges for late check-out.", "Show the tax breakdown for Hotel XYZ.", "Find reservations with a total cost exceeding $500."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 15}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "housekeeping", "description": "The `housekeeping` table keeps track of cleaning schedules and statuses for each room.", "example_queries": ["List rooms due for cleaning today.", "Find rooms that have not been cleaned for 3 days.", "Show the housekeeping schedule for the week.", "Which staff are assigned to housekeeping?", "Retrieve cleaning history for room 101."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 16}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "complaints", "description": "The `complaints` table logs customer complaints, including details about the issue and resolution status.", "example_queries": ["List all unresolved complaints.", "Show complaints related to room cleanliness.", "Find complaints by customer ID 123.", "Retrieve the resolution status for complaint ID 456.", "Who handled the complaint ID 789?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 17}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "check_ins_outs", "description": "The `check_ins_outs` table records the check-in and check-out times for each reservation.", "example_queries": ["Find all late check-outs for today.", "List customers who have not yet checked in.", "Show the earliest check-in times available.", "Retrieve check-in history for customer ID 123.", "Find reservations with early check-in requests."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 18}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "parking", "description": "The `parking` table contains information about parking facilities at each hotel, including availability and costs.", "example_queries": ["List all hotels with free parking.", "Show available parking slots for Hotel XYZ.", "Find hotels with valet parking.", "Retrieve the parking costs for 5-star hotels.", "Which hotels have electric vehicle charging stations?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 19}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "suppliers", "description": "The `suppliers` table holds information about suppliers providing various goods and services to the hotels.", "example_queries": ["List all food suppliers.", "Find suppliers based in New York.", "Which suppliers provide cleaning products?", "Show the contract terms for supplier ID 789.", "Retrieve all pending orders from suppliers."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 20}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "maintenance", "description": "The `maintenance` table logs maintenance requests and statuses for various hotel facilities.", "example_queries": ["List all pending maintenance requests.", "Find maintenance history for the swimming pool.", "Show maintenance schedules for elevators.", "Which staff are assigned to maintenance?", "Retrieve maintenance costs for last month."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 21}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "email_campaigns", "description": "The `email_campaigns` table contains information about email marketing campaigns, including targets, content, and results.", "example_queries": ["List all active email campaigns.", "Find campaigns targeted at frequent travelers.", "Show the open rate for campaign ID 123.", "Retrieve the content of the last promotional email.", "Which customers clicked on the latest campaign?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 22}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "analytics", "description": "The `analytics` table stores various metrics related to user behavior and website performance.", "example_queries": ["Show the most visited hotel pages.", "List conversion rates for the last quarter.", "Find the average session duration for users.", "Which pages have the highest bounce rate?", "Retrieve traffic sources for the past month."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 23}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "gift_cards", "description": "The `gift_cards` table contains information about gift cards sold or redeemed.", "example_queries": ["List all available gift card options.", "Find gift cards redeemed in the last month.", "Show the remaining balance for gift card ID 123.", "Retrieve the expiration dates for gift cards.", "Which customers frequently purchase gift cards?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 24}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "special_requests", "description": "The `special_requests` table logs any special requests made by customers during the reservation process.", "example_queries": ["List all special requests for today.", "Find special requests related to accessibility.", "Show all honeymoon-related requests.", "Retrieve special requests for Hotel ABC.", "Which rooms have frequent special requests?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 25}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "room_upgrades", "description": "The `room_upgrades` table contains information about room upgrades, either offered or purchased.", "example_queries": ["List available room upgrades for Hotel XYZ.", "Find customers who frequently purchase upgrades.", "Show the cost of upgrading to a suite.", "Retrieve all complimentary upgrades.", "Which upgrades are most popular?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 26}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "local_attractions", "description": "The `local_attractions` table provides information about tourist attractions and activities near each hotel.", "example_queries": ["List attractions near Hotel XYZ.", "Find hotels near national parks.", "Show activities suitable for families.", "Retrieve the distance to the nearest beach from each hotel.", "Which attractions offer discounts to hotel guests?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 27}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "taxes_fees", "description": "The `taxes_fees` table contains details about various taxes and fees applicable to reservations.", "example_queries": ["List all mandatory fees for Hotel ABC.", "Find the tax rate for hotels in New York.", "Show the fee breakdown for reservation ID 123.", "Retrieve all taxes applicable to conference rooms.", "Which fees are commonly waived?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 28}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "transportation", "description": "The `transportation` table provides details about transportation options offered by hotels or nearby, such as shuttles, taxis, or public transit.", "example_queries": ["List hotels with shuttle service to the airport.", "Find nearest subway stations to Hotel XYZ.", "Show transportation options for disabled guests.", "Retrieve the schedule for hotel shuttles.", "Which hotels offer bike rentals?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 29}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "corporate_partnerships", "description": "The `corporate_partnerships` table contains information about partnerships between hotels and corporations for special rates or packages.", "example_queries": ["List all corporate partners.", "Find special rates for employees of Company XYZ.", "Show available corporate packages.", "Retrieve the terms of partnership with Company ABC.", "Which corporations have long-term agreements?"]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 30}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "description": "The `customers` table contains information about individuals who book flights, including their personal details and when they were added to the database.", "example_queries": ["Retrieve the email addresses of all customers.", "Find all customers born after a certain date.", "Show the most recently added customers.", "Which customers have the same email domain?", "List customers by a specific first or last name."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 31}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "description": "The `flights` table logs details about each flight, including origin, destination, departure and arrival times, the carrier, and the price.", "example_queries": ["Find all flights departing from a specific location.", "Show all flights to a particular destination.", "Retrieve flights with departure or arrival at a specific time frame.", "Which flights are operated by a particular carrier?", "List all flights within a specific price range."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 32}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "description": "The `reservations` table keeps track of all flight bookings, linking them to customers and flights and recording when the reservation was made and its current status.", "example_queries": ["Find all reservations made by a specific customer.", "Show all reservations for a particular flight.", "Retrieve reservations made within a specific time period.", "Which reservations have a particular status (e.g., confirmed, cancelled, etc.)?", "List the reservations in chronological order."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 33}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "transactions", "description": "The `transactions` table documents all payment transactions related to flight reservations, including the reservation they pertain to, the amount paid, and when the transaction took place.", "example_queries": ["Find all transactions associated with a specific reservation.", "Show all transactions above a certain amount.", "Retrieve transactions that took place in a specific time frame.", "Which reservations have associated transactions?", "List all transactions in order of amount."]}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 34}),
]


In [25]:
vector_store.add_documents(documents_tables)


In [38]:
query = "Provide a list of all flight reservations from October 10th to November 15th, 2023"

# Perform a similarity search
docs = vector_store.similarity_search(
    query,
    k=5,
    #search_type="similarity",
    search_type="hybrid",
)

# Perform a hybrid search with semantic reranking  
# docs = vector_store.semantic_hybrid_search_with_score(
#     query,
#     k=5,
# )

print(docs)


[Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "description": "The `flights` table logs details about each flight, including origin, destination, departure and arrival times, the carrier, and the price.", "example_queries": ["Find all flights departing from a specific location.", "Show all flights to a particular destination.", "Retrieve flights with departure or arrival at a specific time frame.", "Which flights are operated by a particular carrier?", "List all flights within a specific price range."]}', metadata={'source': '/Users/okanyenigun/Desktop/codes/python__general/example.jsonl', 'seq_num': 32}), Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "description": "The `reservations` table keeps track of all flight bookings, linking them to customers and flights and recording when the reservation was made and its current status.", "example_queries": ["Find all reservations made by a specific cust

In [40]:

matched_tables = []

for document in docs:
    page_content = document.page_content
    page_content = json.loads(page_content)
    dataset_name = page_content['dataset_name']
    table_name = page_content['table_name']
    matched_tables.append(f'{dataset_name}.{table_name}')

print(matched_tables)


['flight_reservations.flights', 'flight_reservations.reservations', 'hotel_reservations.reservations', 'flight_reservations.transactions', 'hotel_reservations.check_ins_outs']


In [41]:
index_name: str = "nl2sql-column_vector-store"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_service_endpoint,
    azure_search_key=search_service_api_key,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)


In [9]:

documents_columns = [Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "column_name": "hotel_id", "description": "A unique identifier assigned to each hotel.", "usage": "This ID helps in maintaining a distinct record for each hotel and acts as a primary key. Its also used for referencing in other tables like Rooms.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 1}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "column_name": "hotel_name", "description": "The official name of the hotel.", "usage": "This column provides users with the name of the hotel they are booking or viewing. It aids in branding and recognition.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 2}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "column_name": "location", "description": "Represents the city or area where the hotel is situated.", "usage": "This field helps users in filtering hotels based on their preferred destination.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 3}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "column_name": "rating", "description": "Represents the average rating of the hotel, based on user reviews.", "usage": "Users often sort or filter hotels based on ratings to ensure they get the best experience. A higher rating usually indicates better customer satisfaction.", "data_type": "FLOAT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 4}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reservations", "column_name": "reservation_id", "description": "A unique identifier for each reservation made on the platform.", "usage": "This ID ensures that each booking is distinct and can be referenced for any customer queries or modifications.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 5}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reservations", "column_name": "user_id", "description": "A reference to a user from the Users table who made the reservation.", "usage": "Establishes which user made a specific booking, aiding in personalized user experiences and support.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 6}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reservations", "column_name": "room_id", "description": "Refers to a specific room type in a hotel from the Rooms table.", "usage": "Ensures that the booking corresponds to a specific type of room in a particular hotel.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 7}),
Document(page_content='{"dataset_name": "hotel_reservations", "table_name": "reservations", "column_name": "start_date", "description": "Indicates the beginning date of the reservation.", "usage": "Helps in determining room availability and the users stay period.", "data_type": "DATE"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 8}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reservations", "column_name": "end_date", "description": "Marks the termination date of the reservation.", "usage": "Assists in room inventory management and billing.", "data_type": "DATE"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 9}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "rooms", "column_name": "room_id", "description": "A unique identifier for a specific room type in a hotel.", "usage": "This ID ensures that each room type in a hotel has a unique representation. It also plays a role in making reservations.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 10}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "rooms", "column_name": "hotel_id", "description": "An identifier that references a hotel from the Hotels table.", "usage": "This foreign key establishes a link between the room and its respective hotel, ensuring that rooms are correctly mapped to hotels.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 11}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "rooms", "column_name": "room_type", "description": "Categorizes rooms based on their features and amenities, e.g., Deluxe, Suite, etc.", "usage": "Users can choose a room based on their preferences, like a suite for luxurious stays or deluxe for standard ones.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 12}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "rooms", "column_name": "price_per_night", "description": "Indicates the cost of booking the room for one night.", "usage": "Helps users in understanding the pricing and aids in budget planning.", "data_type": "FLOAT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 13}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "rooms", "column_name": "availability", "description": "Specifies the number of such rooms available for booking.", "usage": "Ensures that overbooking doesnt occur and informs users about room scarcity.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 14}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "customers", "column_name": "customer_id", "description": "A unique identifier for each customer.", "usage": "Used for referencing customers in queries and transactions.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 15}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "customers", "column_name": "name", "description": "The full name of the customer.", "usage": "Used for personalizing customer interactions.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 16}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "customers", "column_name": "contact_details", "description": "Contact information of the customer, including phone number and email.", "usage": "Used for communication with the customer.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 17}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "customers", "column_name": "preferences", "description": "Stored preferences of the customer, such as room type and amenities.", "usage": "Used for tailoring recommendations and services to the customer.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 18}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "amenities", "column_name": "hotel_id", "description": "A unique identifier for each hotel.", "usage": "Used for linking amenities to specific hotels.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 19}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "amenities", "column_name": "amenity_type", "description": "The type of amenity offered, such as gym, pool, or spa.", "usage": "Used for filtering and listing amenities.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 20}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "amenities", "column_name": "availability", "description": "Indicates whether the amenity is currently available.", "usage": "Used for real-time amenity status updates.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 21}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reviews", "column_name": "review_id", "description": "A unique identifier for each review.", "usage": "Used for referencing individual reviews.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 22}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reviews", "column_name": "customer_id", "description": "The customer who provided the review.", "usage": "Used for linking reviews to customers.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 23}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reviews", "column_name": "hotel_id", "description": "The hotel that the review pertains to.", "usage": "Used for aggregating reviews by hotel.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 24}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reviews", "column_name": "rating", "description": "The rating given by the customer, usually on a scale from 1 to 5.", "usage": "Used for calculating the average rating of hotels.", "data_type": "FLOAT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 25}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "reviews", "column_name": "comments", "description": "Textual feedback provided by the customer.", "usage": "Used for qualitative analysis of customer satisfaction.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 26}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "payments", "column_name": "payment_id", "description": "A unique identifier for each payment transaction.", "usage": "Used for tracking and auditing payments.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 27}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "payments", "column_name": "customer_id", "description": "The customer who made the payment.", "usage": "Used for linking payments to customers.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 28}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "payments", "column_name": "amount", "description": "The amount of the payment transaction.", "usage": "Used for financial reporting and auditing.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 29}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "payments", "column_name": "status", "description": "The status of the payment, such as successful, pending, or failed.", "usage": "Used for real-time payment status tracking.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 30}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "payments", "column_name": "payment_method", "description": "The method used for payment, such as credit card or PayPal.", "usage": "Used for analytics and reporting.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 31}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "discounts", "column_name": "discount_code", "description": "A unique code representing each discount.", "usage": "Used for applying discounts during payment.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 32}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "discounts", "column_name": "description", "description": "A brief description of the discount.", "usage": "Used for informing customers about the discount.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 33}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "discounts", "column_name": "eligibility", "description": "Criteria for eligibility, such as military personnel or membership status.", "usage": "Used for verifying discount eligibility.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 34}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "discounts", "column_name": "expiry_date", "description": "The date on which the discount expires.", "usage": "Used for discount lifecycle management.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 35}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "staff", "column_name": "staff_id", "description": "A unique identifier for each staff member.", "usage": "Used for managing staff records.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 36}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "staff", "column_name": "name", "description": "The full name of the staff member.", "usage": "Used for identification and communication.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 37}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "staff", "column_name": "role", "description": "The role or position of the staff member.", "usage": "Used for assigning tasks and responsibilities.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 38}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "staff", "column_name": "contact_details", "description": "Contact information of the staff member.", "usage": "Used for internal communication.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 39}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "staff", "column_name": "work_schedule", "description": "The work schedule or shifts of the staff member.", "usage": "Used for staff management and scheduling.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 40}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "events", "column_name": "event_id", "description": "A unique identifier for each event.", "usage": "Used for event management and ticketing.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 41}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "events", "column_name": "hotel_id", "description": "The hotel where the event is hosted.", "usage": "Used for linking events to specific hotels.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 42}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "events", "column_name": "event_type", "description": "The type of event, such as wedding, conference, or show.", "usage": "Used for categorizing and filtering events.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 43}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "events", "column_name": "availability", "description": "Indicates whether tickets for the event are available.", "usage": "Used for real-time ticketing updates.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 44}),
Document(page_content='{"dataset_name": "hotel_reservations","table_name": "events", "column_name": "date", "description": "The date on which the event is scheduled.", "usage": "Used for event planning and scheduling.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 45}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "customer_id", "description": "A unique identifier assigned to each customer.", "usage": "Ensures each customer is distinct and can be referenced in reservations.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 46}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "first_name", "description": "The first name of the customer.", "usage": "Used to personalize communication and identify the customer.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 47}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "last_name", "description": "The last name of the customer.", "usage": "Used along with the first name to identify and communicate with the customer.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 48}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "email", "description": "The email address of the customer.", "usage": "Primary mode of communication with the customer.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 49}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "date_of_birth", "description": "The birth date of the customer.", "usage": "May be used for age verification and personalized offers.", "data_type": "DATE"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 50}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "customers", "column_name": "created_at", "description": "Timestamp of when the customer data was added to the database.", "usage": "Helps track customer tenure and data age.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 51}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "flight_id", "description": "A unique identifier for each flight.", "usage": "Used to uniquely identify and manage flight records.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 52}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "origin", "description": "The departure location of the flight.", "usage": "Helps users find flights based on their travel plans.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 53}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "destination", "description": "The arrival location of the flight.", "usage": "Used to find flights and plan journeys.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 54}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "departure_datetime", "description": "The departure time of the flight.", "usage": "Informs users and helps them plan their travel.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 55}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "arrival_datetime", "description": "The arrival time of the flight.", "usage": "Informs users and helps them plan their travel.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 56}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "carrier", "description": "The airline operating the flight.", "usage": "Provides users with the choice of airline and informs about the operator.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 57}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "price", "description": "The price of the flight ticket.", "usage": "Informs users and is used during booking transactions.", "data_type": "FLOAT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 58}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "reservation_id", "description": "A unique identifier for each reservation.", "usage": "Used to uniquely identify and manage reservation records.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 59}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "customer_id", "description": "A unique identifier assigned to each customer.", "usage": "Ensures each customer is distinct and can be referenced in reservations.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 60}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "flight_id", "description": "A unique identifier for each flight.", "usage": "Used to uniquely identify and manage flight records.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 61}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "reservation_datetime", "description": "Timestamp of when the reservation was made.", "usage": "Helps track reservation history and manage bookings.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 62}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "status", "description": "The status of the reservation (e.g., confirmed, cancelled).", "usage": "Informs users and staff of the current state of the reservation.", "data_type": "STRING"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 63}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "transactions", "column_name": "transaction_id", "description": "A unique identifier for each transaction.", "usage": "Ensures each transaction is distinct and can be tracked separately.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 64}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "transactions", "column_name": "reservation_id", "description": "A unique identifier for each reservation.", "usage": "Used to uniquely identify and manage reservation records.", "data_type": "INT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 65}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "transactions", "column_name": "amount", "description": "The monetary value of the transaction.", "usage": "Used for accounting and financial tracking.", "data_type": "FLOAT64"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 66}),
Document(page_content='{"dataset_name": "flight_reservations", "table_name": "transactions", "column_name": "transaction_datetime", "description": "Timestamp of when the transaction occurred.", "usage": "Used for financial records, reporting, and auditing.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 67}),
]

print(documents_columns[0])


page_content='{"dataset_name": "hotel_reservations", "table_name": "hotels", "column_name": "hotel_id", "description": "A unique identifier assigned to each hotel.", "usage": "This ID helps in maintaining a distinct record for each hotel and acts as a primary key. Its also used for referencing in other tables like Rooms.", "data_type": "INT64"}' metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 1}


In [52]:
vector_store.add_documents(documents_columns)


['YzFiYWE0NDEtYzc5Yy00NGI5LThiMTAtYjllMTYyMzdjNjFj',
 'OWM5ZjIxYmEtNDFmOS00NDM5LThkOTAtMjA1NTZkMmZkNmM1',
 'ZDc2OTE5NWUtYzNiZi00NzU3LWE5NjQtN2E1OGY5YTg0ZWI2',
 'N2Y2YjM4N2YtYzg3YS00NmI1LWI3ZDAtZjBmNzI3NTc0OWM5',
 'NTgzMGRmOTctOWEyZS00OTYwLTkxYmMtNGI3YmU4ZjdlM2Yw',
 'NmM3YmI0NDQtNjc4Zi00NGQxLWFiY2UtNDZjNmExODJiYTJh',
 'YmNjMmY1MzUtYWVlYy00MjI4LTllNWEtMTRhYjI2Y2U0YmM1',
 'YmE5YzE3NDQtMTNmNy00YWRlLWI1ZjItODI0YTRjOTRlZGJj',
 'NDBmOGJjMjAtNDY0Yy00NmYwLWE2MzYtOTRkNzhjMzdjYTYy',
 'OGUxZDU1ZTktNTBiNi00OGU5LWJhNjEtNTkyZmVmOWVkM2M2',
 'NjFmNDE2ZTQtMzJkOC00MTBkLTkzYWMtZWRiOGU4Y2Q4NTE5',
 'MDAxNWRjZmItOWQ0Yy00NzE2LWJiMGUtZWM1MjBkMTAyYTE5',
 'MGY1Y2U1MjAtNDcxOC00OGNiLWE0NjktMzk2NDYxYzI2OWNj',
 'ZTNhZTA2MGEtYzQ4Ni00OTc4LTg5YzYtMjk0MzYyYTljOTE3',
 'MWE4MWYzNWEtZGYzOS00NmE2LWFmY2YtOWQ3OTNkMTg5ZTAx',
 'ZGE4OWYzMzAtYjEzNS00ZTI3LWIwNTEtZGI4OGUxYjY5ZjEx',
 'MDQ4NWIzNDMtYzg5OC00NWE2LTg4N2YtMjkxYTU3ZWJhYzQx',
 'YWVmMTRmNGYtMTA0NC00Yjg4LTgzODktZGRhZDhmNmZlNzlk',
 'Y2ZmNTg2NDMtNjFlNC00MGFhLTlkNWItNzk1MzhmMmQ4

In [55]:
# Perform a similarity search
matched_columns = vector_store.similarity_search(
    query,
    k=20,
    search_type="hybrid",
)
# Perform a hybrid search with semantic reranking  
# docs = vector_store.semantic_hybrid_search_with_score(
#     query,
#     k=20,
# )
print(matched_columns)


[Document(page_content='{"dataset_name": "flight_reservations", "table_name": "reservations", "column_name": "reservation_datetime", "description": "Timestamp of when the reservation was made.", "usage": "Helps track reservation history and manage bookings.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 1}), Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "arrival_datetime", "description": "The arrival time of the flight.", "usage": "Informs users and helps them plan their travel.", "data_type": "DATETIME"}', metadata={'source': '/Users/sarrabelly/Desktop/codes/python__general/example.jsonl', 'seq_num': 1}), Document(page_content='{"dataset_name": "flight_reservations", "table_name": "flights", "column_name": "departure_datetime", "description": "The departure time of the flight.", "usage": "Informs users and helps them plan their travel.", "data_type": "

In [56]:
matched_columns_filtered = []

# LangChain filters does not support multiple values at the moment
for i, column in enumerate(matched_columns):
    page_content = json.loads(column.page_content)
    dataset_name = page_content['dataset_name']
    if dataset_name == 'flight_reservations':
        matched_columns_filtered.append(page_content)


In [57]:
matched_columns_cleaned = []

for doc in matched_columns_filtered:
    dataset_name = doc['dataset_name']
    table_name = doc['table_name']
    column_name = doc['column_name']
    data_type = doc['data_type']
    matched_columns_cleaned.append(f'dataset_name={dataset_name}|table_name={table_name}|column_name={column_name}|data_type={data_type}')
    
matched_columns_cleaned = '\n'.join(matched_columns_cleaned)
print(matched_columns_cleaned)


dataset_name=flight_reservations|table_name=reservations|column_name=reservation_datetime|data_type=DATETIME
dataset_name=flight_reservations|table_name=flights|column_name=arrival_datetime|data_type=DATETIME
dataset_name=flight_reservations|table_name=flights|column_name=departure_datetime|data_type=DATETIME
dataset_name=flight_reservations|table_name=reservations|column_name=flight_id|data_type=INT64


In [58]:
messages = []


In [61]:
template = "You are a SQL master expert capable of writing complex SQL query in MS SQL Server."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)


In [63]:
human_template = """Given the following inputs:
USER_QUERY:
--
{query}
--
MATCHED_SCHEMA: 
--
{matched_schema}
--
Please construct a SQL query using the MATCHED_SCHEMA and the USER_QUERY provided above. 


IMPORTANT: Use ONLY the column names (column_name) mentioned in MATCHED_SCHEMA. DO NOT USE any other column names outside of this. 
IMPORTANT: Associate column_name mentioned in MATCHED_SCHEMA only to the table_name specified under MATCHED_SCHEMA.
NOTE: Use SQL 'AS' statement to assign a new name temporarily to a table column or even a table wherever needed. 
"""


In [64]:
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)


In [65]:
chat_prompt = ChatPromptTemplate.from_messages(messages)


In [69]:
request = chat_prompt.format_prompt(query=query,matched_schema=matched_columns_cleaned).to_messages()


In [70]:
# Create an instance of chat llm
llm = AzureChatOpenAI(
    azure_endpoint=azure_openai_api_endpoint,
    openai_api_version=azure_openai_api_version,
    azure_deployment=azure_openai_deployment_model,
    openai_api_key=azure_openai_api_key,
    openai_api_type="azure",
    temperature = 0
)


In [71]:
%%time 

response = llm.invoke(request)
sql = '\n'.join(response.content.strip().split('\n')[1:-1])
print(sql)



```sql
SELECT *
FROM flight_reservations.reservations AS r
JOIN flight_reservations.flights AS f
ON r.flight_id = f.flight_id
WHERE r.reservation_datetime BETWEEN '2023-10-10' AND '2023-11-15'
```

CPU times: total: 31.2 ms
Wall time: 4.8 s
