In [1]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

In [2]:
loader=PyPDFDirectoryLoader("D:\Interview\SQL-Assistant-Experiments\data")
docs = loader.load()
docs

[Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250419123113', 'title': 'Healthcare Schema with SQL', 'source': 'D:\\Interview\\SQL-Assistant-Experiments\\data\\Healthcare_Combined_Schema_final.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1'}, page_content='Table: Doctors\nThe Doctors table stores information about healthcare professionals. Each doctor has a unique ID, along with\npersonal details such as first name, last name, date of birth, gender, contact number, and email address.\nCREATE TABLE Doctors (\n    doctor_id INT PRIMARY KEY,\n    first_name VARCHAR(50),\n    last_name VARCHAR(50),\n    dob DATE,\n    gender VARCHAR(10),\n    phone_number VARCHAR(15),\n    email VARCHAR(100)\n);\nTable: Doctor_Licenses\nThe Doctor_Licenses table records licensing information for each doctor. It includes a unique license ID, the\nassociated doctor ID, license number, issue and expiry dates, and the name of the issu

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    is_separator_regex=False,
)

In [4]:
documents=text_splitter.split_documents(docs)

In [5]:
documents

[Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250419123113', 'title': 'Healthcare Schema with SQL', 'source': 'D:\\Interview\\SQL-Assistant-Experiments\\data\\Healthcare_Combined_Schema_final.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1'}, page_content='Table: Doctors\nThe Doctors table stores information about healthcare professionals. Each doctor has a unique ID, along with\npersonal details such as first name, last name, date of birth, gender, contact number, and email address.\nCREATE TABLE Doctors (\n    doctor_id INT PRIMARY KEY,\n    first_name VARCHAR(50),\n    last_name VARCHAR(50),\n    dob DATE,\n    gender VARCHAR(10),\n    phone_number VARCHAR(15),\n    email VARCHAR(100)\n);\nTable: Doctor_Licenses'),
 Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250419123113', 'title': 'Healthcare Schema with SQL', 'source': 'D:\\Intervi

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from langchain_community.vectorstores import FAISS
vectorstore=FAISS.from_documents(documents=documents, embedding=embeddings)
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x24d3ac2b2e0>

In [9]:
retriever=vectorstore.as_retriever()

In [10]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

In [14]:
prompt=ChatPromptTemplate.from_template(
    '''
You are an expert backend database engineer working in a healthcare IT system. 
Your role is to process service requests (SRs) raised by users who are unable to make changes from the UI or need reports for decision-making.

These requests relate to backend database operations such as:
- Correcting wrong data entries (e.g., date of birth, phone number, name)
- Updating missing or outdated values
- Generating reports or summaries from existing data
- Assigning entities (e.g., doctors to hospitals)
- Reviewing logs of appointments or patient records

You have full knowledge of the database schema, which includes tables like:
- Doctors, Doctor_Licenses, Specializations, Doctor_Specializations
- Hospitals, Departments, Doctor_Hospital_Assignments
- Patients, Appointments

### Your task:
Given a natural language service request, understand the user's intent and generate:
1. The *Action Type* (e.g., Update, Correction, Report)
2. The *Target Table(s)*
3. The *SQL Query* or stored procedure to resolve the issue
4. A short *explanation* of what the query does

Make sure the output is aligned with schema column names and formatted for clarity.

---
{context}
### Input:
{input}

### Output:
*Action Type:*  
*Target Table(s):*  
*SQL Query:*  
```sql
-- SQL query here
'''
)

In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv
load_dotenv()
llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",api_key=os.getenv("GEMINI_API_KEY"))

In [15]:
document_chain=create_stuff_documents_chain(llm=llm,prompt=prompt)
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [18]:
response=retrieval_chain.invoke(
    {"input":"Update all phone numbers starting with 99999 to 88888 in the Patients table. List all departments on the 3rd floor in hospital ID 2. Show all appointments scheduled for next week across all hospitals"}
)

In [19]:
response['answer']

"```sql\nUPDATE Patients SET phone_number = REPLACE(phone_number, '99999', '88888') WHERE phone_number LIKE '99999%';\nSELECT department_name FROM Departments WHERE floor_number = 3 AND hospital_id = 2;\nSELECT * FROM Appointments WHERE appointment_date BETWEEN TRUNC(SYSDATE + 7) AND TRUNC(SYSDATE + 14);\n```\n\n*Explanation:*\n```\nThe first query updates the phone numbers in the Patients table, replacing the '99999' prefix with '88888' where applicable.\nThe second query retrieves the names of departments located on the 3rd floor of the hospital with hospital_id 2.\nThe third query retrieves all appointments scheduled for the next 7 days.\n```\n\n*Action Type:* Update, Report\n*Target Table(s):* Patients, Departments, Appointments\n*SQL Query:*\n```sql\nUPDATE Patients SET phone_number = REPLACE(phone_number, '99999', '88888') WHERE phone_number LIKE '99999%';\nSELECT department_name FROM Departments WHERE floor_number = 3 AND hospital_id = 2;\nSELECT * FROM Appointments WHERE appoin