Our Batch

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

# Set up the Chrome driver using webdriver_manager
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Dummy DOB accepted by the website (format dd/mm/yyyy)
default_dob = "01/01/2000"

# Build registration number list
regnos = [f"22secd{str(i).zfill(2)}" for i in range(1, 120)]
extra_regnos = []

extra_regnos = []

regnos.extend(extra_regnos)

# Define exam batches (value, display text)
exam_batches = [
    ("116", "APRIL 2023"),
    ("118", "AUGUST 2023"),
    ("119", "NOVEMBER - 2023"),
    ("121", "MARCH - 2024"),
    ("123", "AUGUST - 2024"),
    ("124", "AUGUST 2024"),
    ("125", "MARCH/APRIL - 2025"),
    ("129", "MARCH/APRIL - 2025")
]

# List to store all student data
students_data = []

# Function to extract GPA from text
def extract_gpa(text):
    try:
        return float(text.split(":")[-1].strip())
    except Exception:
        return None

# Iterate over each registration number
for reg_no in regnos:
    print(f"Processing: {reg_no}")
    driver.get("https://results.uomexam.com/")

    try:
        # Wait for the input fields to be present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_txtRollNo"))
        )

        # Input registration number and DOB
        reg_input = driver.find_element(By.ID, "MainContent_txtRollNo")
        dob_input = driver.find_element(By.ID, "MainContent_txtDoB")
        reg_input.clear()
        reg_input.send_keys(reg_no)
        dob_input.clear()
        dob_input.send_keys(default_dob)

        # Submit the form
        submit_button = driver.find_element(By.ID, "MainContent_btnSubmit")
        submit_button.click()

        # Wait for the result summary to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
        )

        # Initialize student record
        student_record = {"regno": reg_no}

        # Extract student name
        try:
            student_name = driver.find_element(By.ID, "MainContent_lblStudentName").text.strip()
        except Exception:
            student_name = None
            print(f"Name not found for {reg_no}")
        student_record["name"] = student_name

        # Extract semester
        try:
            student_sem = driver.find_element(By.ID, "MainContent_lblSem").text.strip()
        except Exception:
            student_sem = None
            print(f"Semester not found for {reg_no}")
        student_record["semester"] = student_sem

        gpa_list = []

        # Check for the presence of the exam batch dropdown
        try:
            dropdown_element = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
            )
            dropdown = Select(dropdown_element)
            available_values = [opt.get_attribute("value") for opt in dropdown.options]

            for value, label in exam_batches:
                if value not in available_values:
                    student_record[f"{label} gpa"] = None
                    continue

                try:
                    # Re-locate the dropdown to avoid stale element reference
                    dropdown_element = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
                    )
                    dropdown = Select(dropdown_element)
                    dropdown.select_by_value(value)

                    # Wait for the result summary to update
                    WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
                    )

                    gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                    gpa_value = extract_gpa(gpa_text)
                    student_record[f"{label} gpa"] = gpa_value
                    if gpa_value is not None:
                        gpa_list.append(gpa_value)
                except Exception as ex:
                    print(f"Error handling {label} for {reg_no}: {ex}")
                    student_record[f"{label} gpa"] = None
        except Exception:
            print(f"No dropdown for {reg_no}, attempting single GPA fetch.")
            try:
                gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                gpa_value = extract_gpa(gpa_text)
                student_record["Only batch gpa"] = gpa_value
                if gpa_value is not None:
                    gpa_list.append(gpa_value)
            except Exception as ex:
                print(f"GPA fetch failed without dropdown for {reg_no}: {ex}")
                student_record["Only batch gpa"] = None

        # Calculate average GPA
        student_record["avg gpa"] = sum(gpa_list) / len(gpa_list) if gpa_list else None

        # Append the student record to the data list
        students_data.append(student_record)

    except Exception as e:
        print(f"Failed to process {reg_no}: {e}")
        # Append a record with None values in case of failure
        failed_record = {"regno": reg_no, "name": None, "semester": None}
        for _, label in exam_batches:
            failed_record[f"{label} gpa"] = None
        failed_record["Only batch gpa"] = None
        failed_record["avg gpa"] = None
        students_data.append(failed_record)

# Save all data to CSV
df = pd.DataFrame(students_data)
df.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/CSD_Results.csv", index=False)
print("✅ Results saved")

# Close the browser
driver.quit()

Processing: 22secd01
Processing: 22secd02
Processing: 22secd03
Processing: 22secd04
Processing: 22secd05
Processing: 22secd06
Failed to process 22secd06: Message: 
Stacktrace:
0   chromedriver                        0x0000000102b6b570 cxxbridge1$str$ptr + 2731064
1   chromedriver                        0x0000000102b63468 cxxbridge1$str$ptr + 2698032
2   chromedriver                        0x00000001026b23f8 cxxbridge1$string$len + 90664
3   chromedriver                        0x00000001026f971c cxxbridge1$string$len + 382284
4   chromedriver                        0x000000010273ab1c cxxbridge1$string$len + 649548
5   chromedriver                        0x00000001026eda0c cxxbridge1$string$len + 333884
6   chromedriver                        0x0000000102b2e5f4 cxxbridge1$str$ptr + 2481340
7   chromedriver                        0x0000000102b3185c cxxbridge1$str$ptr + 2494244
8   chromedriver                        0x0000000102b0f248 cxxbridge1$str$ptr + 2353424
9   chromedriver         

In [5]:
import pandas as pd

# Read the CSV file
df = pd.read_csv(r"/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/AIDS_Results.csv")

# Sort by "avg gpa" in descending order
df_sorted = df.sort_values(by="avg gpa", ascending=False)

# Save the sorted DataFrame to a new CSV file
df_sorted.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/DS_results_sorted.csv", index=False)
print("Sorted CSV saved as results_sorted.csv")


Sorted CSV saved as results_sorted.csv


Juniors

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

# Set up the Chrome driver using webdriver_manager
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Dummy DOB accepted by the website (format dd/mm/yyyy)
default_dob = "01/01/2000"

# Build registration number list
regnos = [f"23secd{str(i).zfill(2)}" for i in range(100, 126)]
extra_regnos = []

extra_regnos = []

regnos.extend(extra_regnos)

# Define exam batches (value, display text)
exam_batches = [
    ("119", "DECEMBER - 2023"),
    ("122", "APRIL / MAY 2024"),
    ("124", "AUGUST - 2024"),
    ("128", "FEBRUARY - 2025")
    # ("123", "AUGUST - 2024"),
    # ("124", "AUGUST 2024"),
    # ("125", "MARCH/APRIL - 2025"),
    # ("129", "MARCH/APRIL - 2025")
]

# List to store all student data
students_data = []

# Function to extract GPA from text
def extract_gpa(text):
    try:
        return float(text.split(":")[-1].strip())
    except Exception:
        return None

# Iterate over each registration number
for reg_no in regnos:
    print(f"Processing: {reg_no}")
    driver.get("https://results.uomexam.com/")

    try:
        # Wait for the input fields to be present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_txtRollNo"))
        )

        # Input registration number and DOB
        reg_input = driver.find_element(By.ID, "MainContent_txtRollNo")
        dob_input = driver.find_element(By.ID, "MainContent_txtDoB")
        reg_input.clear()
        reg_input.send_keys(reg_no)
        dob_input.clear()
        dob_input.send_keys(default_dob)

        # Submit the form
        submit_button = driver.find_element(By.ID, "MainContent_btnSubmit")
        submit_button.click()

        # Wait for the result summary to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
        )

        # Initialize student record
        student_record = {"regno": reg_no}

        # Extract student name
        try:
            student_name = driver.find_element(By.ID, "MainContent_lblStudentName").text.strip()
        except Exception:
            student_name = None
            print(f"Name not found for {reg_no}")
        student_record["name"] = student_name

        # Extract semester
        try:
            student_sem = driver.find_element(By.ID, "MainContent_lblSem").text.strip()
        except Exception:
            student_sem = None
            print(f"Semester not found for {reg_no}")
        student_record["semester"] = student_sem

        gpa_list = []

        # Check for the presence of the exam batch dropdown
        try:
            dropdown_element = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
            )
            dropdown = Select(dropdown_element)
            available_values = [opt.get_attribute("value") for opt in dropdown.options]

            for value, label in exam_batches:
                if value not in available_values:
                    student_record[f"{label} gpa"] = None
                    continue

                try:
                    # Re-locate the dropdown to avoid stale element reference
                    dropdown_element = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
                    )
                    dropdown = Select(dropdown_element)
                    dropdown.select_by_value(value)

                    # Wait for the result summary to update
                    WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
                    )

                    gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                    gpa_value = extract_gpa(gpa_text)
                    student_record[f"{label} gpa"] = gpa_value
                    if gpa_value is not None:
                        gpa_list.append(gpa_value)
                except Exception as ex:
                    print(f"Error handling {label} for {reg_no}: {ex}")
                    student_record[f"{label} gpa"] = None
        except Exception:
            print(f"No dropdown for {reg_no}, attempting single GPA fetch.")
            try:
                gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                gpa_value = extract_gpa(gpa_text)
                student_record["Only batch gpa"] = gpa_value
                if gpa_value is not None:
                    gpa_list.append(gpa_value)
            except Exception as ex:
                print(f"GPA fetch failed without dropdown for {reg_no}: {ex}")
                student_record["Only batch gpa"] = None

        # Calculate average GPA
        student_record["avg gpa"] = sum(gpa_list) / len(gpa_list) if gpa_list else None

        # Append the student record to the data list
        students_data.append(student_record)

    except Exception as e:
        print(f"Failed to process {reg_no}: {e}")
        # Append a record with None values in case of failure
        failed_record = {"regno": reg_no, "name": None, "semester": None}
        for _, label in exam_batches:
            failed_record[f"{label} gpa"] = None
        failed_record["Only batch gpa"] = None
        failed_record["avg gpa"] = None
        students_data.append(failed_record)

# Save all data to CSV
df = pd.DataFrame(students_data)
df.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/CSD_JUNIORS_Results2.csv", index=False)
print("✅ Results saved")

# Close the browser
driver.quit()

Processing: 23secd100
Processing: 23secd101
Processing: 23secd102
Processing: 23secd103
Processing: 23secd104
Processing: 23secd105
Processing: 23secd106
Processing: 23secd107
Processing: 23secd108
Processing: 23secd109
Processing: 23secd110
Processing: 23secd111
Processing: 23secd112
Processing: 23secd113
Processing: 23secd114
Processing: 23secd115
Processing: 23secd116
Processing: 23secd117
Processing: 23secd118
Processing: 23secd119
Processing: 23secd120
Processing: 23secd121
Processing: 23secd122
Processing: 23secd123
Processing: 23secd124
Processing: 23secd125
Failed to process 23secd125: Message: 
Stacktrace:
0   chromedriver                        0x0000000104c74b38 cxxbridge1$str$ptr + 2722088
1   chromedriver                        0x0000000104c6caa8 cxxbridge1$str$ptr + 2689176
2   chromedriver                        0x00000001047be33c cxxbridge1$string$len + 90648
3   chromedriver                        0x0000000104805494 cxxbridge1$string$len + 381808
4   chromedriver      

In [5]:
import pandas as pd

# Read the CSV file
df = pd.read_csv(r"/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/3rd_batch/CSD_Juniors_results_sorted1.csv")

# Sort by "avg gpa" in descending order
df_sorted = df.sort_values(by="avg gpa", ascending=False)

# Save the sorted DataFrame to a new CSV file
df_sorted.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/3rd_batch/CSD_Juniors_results_sorted3.csv", index=False)
print("Sorted CSV saved as results_sorted.csv")

Sorted CSV saved as results_sorted.csv


Seniors

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

# Set up the Chrome driver using webdriver_manager
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Dummy DOB accepted by the website (format dd/mm/yyyy)
default_dob = "01/01/2000"

# Build registration number list
regnos = [f"21secd{str(i).zfill(2)}" for i in range(1, 62)]
extra_regnos = []

extra_regnos = []

regnos.extend(extra_regnos)

# Define exam batches (value, display text)
exam_batches = [
    ("110", "JUNE 2022"),
    ("113", "SEPTEMBER 2022"),
    ("114", "NOVEMBER 2022"),
    ("116", "APRIL 2023"),
    ("119", "NOVEMBER - 2023"),
    ("121", "MARCH - 2024"),
    ("123", "AUGUST - 2024"),
    ("129", "MARCH/APRIL - 2025"),
    ("131", "JUNE 2025")
]

# List to store all student data
students_data = []

# Function to extract GPA from text
def extract_gpa(text):
    try:
        return float(text.split(":")[-1].strip())
    except Exception:
        return None

# Iterate over each registration number
for reg_no in regnos:
    print(f"Processing: {reg_no}")
    driver.get("https://results.uomexam.com/")

    try:
        # Wait for the input fields to be present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_txtRollNo"))
        )

        # Input registration number and DOB
        reg_input = driver.find_element(By.ID, "MainContent_txtRollNo")
        dob_input = driver.find_element(By.ID, "MainContent_txtDoB")
        reg_input.clear()
        reg_input.send_keys(reg_no)
        dob_input.clear()
        dob_input.send_keys(default_dob)

        # Submit the form
        submit_button = driver.find_element(By.ID, "MainContent_btnSubmit")
        submit_button.click()

        # Wait for the result summary to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
        )

        # Initialize student record
        student_record = {"regno": reg_no}

        # Extract student name
        try:
            student_name = driver.find_element(By.ID, "MainContent_lblStudentName").text.strip()
        except Exception:
            student_name = None
            print(f"Name not found for {reg_no}")
        student_record["name"] = student_name

        # Extract semester
        try:
            student_sem = driver.find_element(By.ID, "MainContent_lblSem").text.strip()
        except Exception:
            student_sem = None
            print(f"Semester not found for {reg_no}")
        student_record["semester"] = student_sem

        gpa_list = []

        # Check for the presence of the exam batch dropdown
        try:
            dropdown_element = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
            )
            dropdown = Select(dropdown_element)
            available_values = [opt.get_attribute("value") for opt in dropdown.options]

            for value, label in exam_batches:
                if value not in available_values:
                    student_record[f"{label} gpa"] = None
                    continue

                try:
                    # Re-locate the dropdown to avoid stale element reference
                    dropdown_element = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_ddlExamBatch"))
                    )
                    dropdown = Select(dropdown_element)
                    dropdown.select_by_value(value)

                    # Wait for the result summary to update
                    WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.ID, "MainContent_lblResultSummary"))
                    )

                    gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                    gpa_value = extract_gpa(gpa_text)
                    student_record[f"{label} gpa"] = gpa_value
                    if gpa_value is not None:
                        gpa_list.append(gpa_value)
                except Exception as ex:
                    print(f"Error handling {label} for {reg_no}: {ex}")
                    student_record[f"{label} gpa"] = None
        except Exception:
            print(f"No dropdown for {reg_no}, attempting single GPA fetch.")
            try:
                gpa_text = driver.find_element(By.ID, "MainContent_lblResultSummary").text.strip()
                gpa_value = extract_gpa(gpa_text)
                student_record["Only batch gpa"] = gpa_value
                if gpa_value is not None:
                    gpa_list.append(gpa_value)
            except Exception as ex:
                print(f"GPA fetch failed without dropdown for {reg_no}: {ex}")
                student_record["Only batch gpa"] = None

        # Calculate average GPA
        student_record["avg gpa"] = sum(gpa_list) / len(gpa_list) if gpa_list else None

        # Append the student record to the data list
        students_data.append(student_record)

    except Exception as e:
        print(f"Failed to process {reg_no}: {e}")
        # Append a record with None values in case of failure
        failed_record = {"regno": reg_no, "name": None, "semester": None}
        for _, label in exam_batches:
            failed_record[f"{label} gpa"] = None
        failed_record["Only batch gpa"] = None
        failed_record["avg gpa"] = None
        students_data.append(failed_record)

# Save all data to CSV
df = pd.DataFrame(students_data)
df.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/1st_batch/CSD_Results.csv", index=False)
print("✅ Results saved")

# Close the browser
driver.quit()

Processing: 21secd01
Processing: 21secd02
Processing: 21secd03
Processing: 21secd04
Processing: 21secd05
Processing: 21secd06
Processing: 21secd07
Processing: 21secd08
Processing: 21secd09
Processing: 21secd10
Processing: 21secd11
Processing: 21secd12
Processing: 21secd13
Processing: 21secd14
Processing: 21secd15
Processing: 21secd16
Processing: 21secd17
Processing: 21secd18
Processing: 21secd19
Processing: 21secd20
Processing: 21secd21
Processing: 21secd22
Processing: 21secd23
Processing: 21secd24
Processing: 21secd25
Processing: 21secd26
Processing: 21secd27
Processing: 21secd28
Processing: 21secd29
Processing: 21secd30
Processing: 21secd31
Processing: 21secd32
Processing: 21secd33
Processing: 21secd34
Processing: 21secd35
Processing: 21secd36
Processing: 21secd37
Processing: 21secd38
Processing: 21secd39
Processing: 21secd40
Processing: 21secd41
Processing: 21secd42
Processing: 21secd43
Processing: 21secd44
Processing: 21secd45
Processing: 21secd46
Processing: 21secd47
Processing: 2

In [13]:
import pandas as pd

# Read the CSV file
df = pd.read_csv(r"/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/1st_batch/CSD_Results.csv")

# Sort by "avg gpa" in descending order
df_sorted = df.sort_values(by="avg gpa", ascending=False)

# Save the sorted DataFrame to a new CSV file
df_sorted.to_csv("/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/1st_batch/CSD_results_sorted.csv", index=False)
print("Sorted CSV saved as results_sorted.csv")

Sorted CSV saved as results_sorted.csv


Web-scrapping Done! Moving on to the SQL Database creation

In [16]:
import os
import pandas as pd
import sqlite3

def create_sql_database_from_csvs(root_directory, db_name):
    """
    Scans a directory for CSV files and imports each into a new table 
    in a SQLite database, setting 'reg no' as the primary key where available.

    Args:
        root_directory (str): The path to the main folder containing CSV files.
        db_name (str): The name for the output SQLite database file (e.g., 'Results.db').
    """
    try:
        conn = sqlite3.connect(db_name)
        print(f"Successfully connected to or created database '{db_name}'.")

        # Walk through the directory tree
        for subdir, dirs, files in os.walk(root_directory):
            for filename in files:
                if filename.endswith('.csv'):
                    csv_path = os.path.join(subdir, filename)
                    table_name = os.path.splitext(filename)[0]
                    
                    print(f"\nProcessing '{csv_path}'...")
                    
                    try:
                        # Read the CSV file into a pandas DataFrame
                        df = pd.read_csv(csv_path)
                        
                        # --- MODIFIED: Primary Key Logic ---
                        # Check if 'reg no' column exists in the DataFrame
                        if 'regno' in df.columns:
                            # Set the 'reg no' column as the DataFrame's index.
                            # This index will become the primary key in the SQL table.
                            df.set_index('regno', inplace=True)
                            
                            # Write the DataFrame to the SQL database.
                            # index=True ensures the index ('reg no') is written as a column.
                            df.to_sql(table_name, conn, if_exists='replace', index=True)
                            
                            print(f" -> Success: Table '{table_name}' created with {len(df)} rows.")
                            print(f" -> Primary Key: 'reg no' has been set for table '{table_name}'.")
                        else:
                            # If 'reg no' is not found, create the table without a primary key.
                            df.to_sql(table_name, conn, if_exists='replace', index=False)
                            
                            print(f" -> Success: Table '{table_name}' created with {len(df)} rows.")
                            print(f" -> WARNING: Column 'reg no' not found in {filename}. Table created without a primary key.")
                        # --- End of Modification ---
                        
                    except Exception as e:
                        print(f" -> FAILED to process {filename}. Error: {e}")

    except sqlite3.Error as e:
        print(f"Database error: {e}")
    finally:
        if conn:
            conn.close()
            print("\nDatabase connection closed.")

def verify_database_tables(db_name):
    """
    Connects to the database and lists all created tables for verification.
    """
    if not os.path.exists(db_name):
        print(f"Database '{db_name}' not found.")
        return
        
    print("\n--- Verifying Database ---")
    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()
        
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        
        if tables:
            print(f"Tables found in '{db_name}':")
            for table in tables:
                print(f"- {table[0]}")
        else:
            print("No tables found in the database.")
            
    except sqlite3.Error as e:
        print(f"Database error during verification: {e}")
    finally:
        if conn:
            conn.close()


# --- Main execution ---
if __name__ == "__main__":
    # --- MODIFIED: Get directory path from user input ---
    directory_path = '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv'
    DATABASE_NAME = 'Results.db'
    
    # Check if the user-provided directory exists
    if not os.path.isdir(directory_path):
        print(f"Error: Directory '{directory_path}' not found.")
        print("Please run the script again and provide a valid path.")
    else:
        create_sql_database_from_csvs(directory_path, DATABASE_NAME)
        verify_database_tables(DATABASE_NAME)


Successfully connected to or created database 'Results.db'.

Processing '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/3rd_batch/3_batch_AIML_Results.csv'...
 -> Success: Table '3_batch_AIML_Results' created with 59 rows.
 -> Primary Key: 'reg no' has been set for table '3_batch_AIML_Results'.

Processing '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/3rd_batch/3_batch_CSD_Results.csv'...
 -> Success: Table '3_batch_CSD_Results' created with 89 rows.
 -> Primary Key: 'reg no' has been set for table '3_batch_CSD_Results'.

Processing '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/3rd_batch/3_batch_AIDS_Results.csv'...
 -> Success: Table '3_batch_AIDS_Results' created with 60 rows.
 -> Primary Key: 'reg no' has been set for table '3_batch_AIDS_Results'.

Processing '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/Results_csv/2nd_batch/2_batch_AIML_Results.csv'...
 -> Success: Table '2_batch_AIML_Results' create

In [1]:
import sqlite3
import pandas as pd
import os

def run_test_query(db_name):
    """
    Connects to the SQLite database, runs a specific test query,
    and prints the results.
    """
    # Check if the database file exists before trying to connect
    if not os.path.exists(db_name):
        print(f"Error: The database file '{db_name}' was not found.")
        print("Please make sure you have run the previous script to create the database.")
        return

    try:
        # Connect to the SQLite database
        conn = sqlite3.connect(db_name)
        print(f"Successfully connected to '{db_name}'.")

        # --- Static SQL query copied directly ---
        query = """
        SELECT "name" FROM "1_batch_AIML_Results" WHERE "avg gpa" > 9.00 UNION SELECT "name" FROM "1_batch_CSD_Results" WHERE "avg gpa" > 9.00 UNION SELECT "name" FROM "1_batch_AIDS_Results" WHERE "avg gpa"
        """

        print("\nExecuting query...\n")

        # Use pandas to execute the query and return a DataFrame
        results_df = pd.read_sql_query(query, conn)

        # --- Display the Results ---
        if results_df.empty:
            print("Query executed successfully, but no records matched your criteria.")
        else:
            print("Query Results:")
            print(results_df.to_string(index=False))

    except sqlite3.OperationalError as e:
        print(f"An SQL error occurred: {e}")
        print("\n--- Troubleshooting ---")
        print("This error often means:")
        print("1. One or more tables do not exist in the database.")
        print("2. A column (e.g., 'name' or one of the GPA columns) does not exist in the table.")
        print("Please verify the table and column names in your database.")
        
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
    finally:
        # Ensure the database connection is closed
        if 'conn' in locals() and conn:
            conn.close()
            print("\nDatabase connection closed.")


# --- Main execution ---
if __name__ == "__main__":
    DATABASE_NAME = '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/data/Results.db'
    run_test_query(DATABASE_NAME)


Successfully connected to '/Users/ranjanumeshrao/Downloads/DAL_Lab/Text2SQL/T2S_RAG/data/Results.db'.

Executing query...

Query Results:
                       name
             AMRUTHA S BORE
                  ANKITHA B
               ANUPRIYA K V
            ANVEET KULKARNI
                  ARPITHA J
                ARPITHA S M
                   ASHWIN J
             CHAITHRA MOULI
               CHANDANA B S
                 CHIRAG M L
                   CHIRAG Y
             D PUNEETH RAJU
                  DEEKSHA D
         GUNARI NISITHA SRI
              HARSHITHA H R
                HARSHITHA S
                     HEMA V
             HIMANSHU GUPTA
                    ISIRI N
             KIRANKUMAR S R
                  KRUPA M C
                 KUSUMA J M
       LAKUMIKAR K S PRASAD
                LAVANYA H M
               LEKHA MURTHY
          LIKITHA DHARANI D
                MALIHA KHAN
                 MANASA M P
              MARIAM NIKATH
               MEGHASH

Database creation done!