In [1]:
import os
import nltk
import docx
import torch
import queue
import spacy
import PyPDF2
import joblib
import cohere
import logging
import tempfile
import warnings
import threading
import numpy as np
import pandas as pd
import tkinter as tk
from tkinter import ttk
from sklearn.svm import SVC
from bs4 import BeautifulSoup
from pdfminer.high_level import extract_text
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize.texttiling import TextTilingTokenizer
from tkinter import filedialog, messagebox, Text, Scrollbar
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline, BertTokenizerFast, BertForTokenClassification




In [2]:
# Loading the dataset
dataset_path = r"C:\Users\ASUS\OneDrive\Documents\Semester 9\Mini Project\Cases.csv"
cases_df = pd.read_csv(dataset_path)

In [3]:
# Initializing the SentenceTransformer model for semantic embedding
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

README.md:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

In [4]:
# Loading the fine-tuned model and tokenizer
model = BertForTokenClassification.from_pretrained('./ner_model')
tokenizer = BertTokenizerFast.from_pretrained('./ner_model')
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

In [5]:
# Initializing models and APIs
co = cohere.Client(os.environ.get("2thOmTVyrX5uwYuSieF1AhVKxRwP3UuZxakIiXc3"))
nlp = spacy.load("en_core_web_sm")
tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
model = AutoModel.from_pretrained("law-ai/InLegalBERT")
tiling_tokenizer = TextTilingTokenizer()


In [6]:
result_queue = queue.Queue()

In [7]:
# Suppressing warnings
warnings.filterwarnings('ignore')

In [8]:
def upload_file():
    file_path = filedialog.askopenfilename(
        title="Select Case File",
        filetypes=[("PDF Files", "*.pdf"), ("Word Documents", "*.docx"), ("Text Files", "*.txt")]
    )
    if file_path:
        threading.Thread(target=process_file, args=(file_path,), daemon=True).start()
        start_progress_bar()
    else:
        messagebox.showinfo("Info", "No file selected.")

In [9]:
def process_file(file_path):
    try:
        logging.info(f"File Selected: {file_path}")
        file_label.config(text=f"Selected File Path: {file_path}")

        if file_path.endswith(".pdf"):
            content = extract_pdf(file_path)
        elif file_path.endswith(".docx"):
            content = extract_docx(file_path)
        elif file_path.endswith(".txt"):
            content = extract_txt(file_path)
        else:
            messagebox.showerror("Error", "Unsupported file format.")
            return
        display_content(content)
        entities = extract_entities(content)
        unique_entities = list({(ent['word'], ent['entity_group']) for ent in entities})
        print("Extracted Entities:", unique_entities)

        
        search_query = " ".join([ent[0] for ent in unique_entities])
        case_links = search_indian_kanoon(search_query)
        print("Found Case Links:", case_links)

        
        for i in range(min(len(unique_entities), len(case_links))):
            ent = unique_entities[i]
            link = case_links[i]
            print(f"{ent[0]} : {link}")

        # Predict case details
        preceding, legal_laws, result = predict_case_details(content)
        print(f"Predicted Preceding: {preceding}")
        print(f"Predicted Legal Laws: {legal_laws}")
        print(f"Predicted Result: {result}")


    except Exception as e:
        logging.error(f"An error occurred: {e}")
        messagebox.showerror("Error", f"An error occurred: {e}")
    
    finally:
        stop_progress_bar()


In [10]:
def update_gui():
    try:
        while not result_queue.empty():
            content = result_queue.get()
            display_content(content)
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred while updating GUI: {e}")
    finally:
        root.after(100, update_gui)

In [11]:
def select_pdf(pdf_path_var):
    """Open file dialog to select a PDF and display its path."""
    file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
    print(f"Selected file path: {file_path}")  # Debugging step
    
    if file_path:  # Check if a file was selected
        pdf_path_var.set(file_path)
    else:
        print("No file selected!")  # Debugging step

In [12]:
def extract_text_from_pdf(pdf_path):
    return extract_text(pdf_path)



In [13]:
def run_engine(pdf_path_var, query_entry, response_text):
    pdf_path = pdf_path_var.get()
    query = query_entry.get("1.0", tk.END).strip()
    
    if not pdf_path or not query:
        messagebox.showerror("Error", "Please upload a PDF and enter a query.")
        return

    try:
        response = process_pdf_and_generate_response(pdf_path, query)
        response_text.delete("1.0", tk.END)
        response_text.insert(tk.END, response)
    except Exception as e:
        messagebox.showerror("Error", str(e))

In [14]:
def process_pdf_and_generate_response(pdf_path, query): 
    
    # Extract text from the PDF
    document_text = extract_text_from_pdf(pdf_path)
    
    # Tokenize and get document embeddings
    text_chunks = tiling_tokenizer.tokenize(document_text)
    document_embeddings = get_bert_embeddings(text_chunks)
    
    prompt = f"You are an AI-driven research engine for commercial courts. Given the legal document: '{document_text[:2000]}', answer the query: '{query}'"
    response = generate_response(prompt, document_embeddings)
    
    # Check if the query mentions predictions or related cases
    if "prediction" in query.lower() or "preceding" in query.lower() or "legal_laws" in query.lower() or "result" in query.lower():
        predictions = predict_case_details(document_text)
        response += "\n\nPredictions: " + str(predictions)
    
    if "related cases" in query.lower() or "similar cases" in query.lower():
        related_cases = search_indian_kanoon(query)
        response += "\n\nRelated Cases: " + str(related_cases)
    
    return response

In [15]:
def generate_response(prompt, embeddings):
    aggregated_embedding = np.mean([np.mean(embed) for embed in embeddings])
    embedding_str = f"Embedding summary: {aggregated_embedding:.2f}"
    full_prompt = f"{embedding_str}\n\n{prompt}"
    try:
        response = co.chat(
            model="command-xlarge-nightly",
            message=full_prompt,
            max_tokens=750
        )
        return response.text.strip()
    except cohere.error.CohereError as e:
        return f"An error occurred: {str(e)}"

In [16]:
def get_bert_embeddings(texts):
    embeddings_list = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
        embeddings_list.append(embeddings)
    return embeddings_list

In [17]:
def open_research_page():
    # Root window
    root = tk.Tk()
    root.title("AI Research Engine for Legal Documents")
    root.geometry("850x700")
    root.configure(bg="#4CAF50")

    # Fonts
    title_font = ("Arial", 18, "bold")
    label_font = ("Arial", 12)
    button_font = ("Arial", 11)

    # Frames
    title_frame = tk.Frame(root, bg="#4CAF50", height=60)
    title_frame.pack(fill=tk.X)

    content_frame = tk.Frame(root, bg="#f5f5f5", padx=20, pady=20)
    content_frame.pack(fill=tk.BOTH, expand=True)

    footer_frame = tk.Frame(root, bg="#4CAF50", height=40)
    footer_frame.pack(fill=tk.X)

    # Title Label
    title_label = tk.Label(
        title_frame,
        text="AI-Driven Research Engine for Commercial Courts",
        font=title_font,
        bg="#4CAF50",
        fg="#ffffff"
    )
    title_label.pack(pady=10)

    # File Upload Section
    pdf_path_var = tk.StringVar()
    file_frame = tk.LabelFrame(content_frame, text="Upload Document", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    file_frame.pack(fill="x", pady=10)

    file_entry = ttk.Entry(file_frame, textvariable=pdf_path_var, width=60, state='readonly')
    file_entry.grid(row=0, column=0, padx=5, pady=5)

    browse_button = ttk.Button(file_frame, text="Browse", command=lambda: select_pdf(pdf_path_var))
    browse_button.grid(row=0, column=1, padx=5, pady=5)

    # Query Input Section
    query_frame = tk.LabelFrame(content_frame, text="Enter Query", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    query_frame.pack(fill="x", pady=10)

    query_entry = Text(query_frame, height=5, wrap=tk.WORD, font=("Arial", 10))
    query_entry.pack(fill="x", padx=5, pady=5)

    # Run Button
    run_button = ttk.Button(content_frame, text="Generate Response", command=lambda: run_engine(pdf_path_var, query_entry, response_text))
    run_button.pack(pady=10)

    # Response Display Section
    response_frame = tk.LabelFrame(content_frame, text="Response", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    response_frame.pack(fill="both", expand=True, pady=10)

    response_text = Text(response_frame, wrap=tk.WORD, font=("Arial", 10), height=15)
    response_text.pack(fill="both", expand=True, padx=5, pady=5)

    response_scrollbar = Scrollbar(response_frame, command=response_text.yview)
    response_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    response_text.config(yscrollcommand=response_scrollbar.set)

    # Footer Label
    footer_label = tk.Label(
        footer_frame,
        text="© 2024 AI Research Engine",
        font=("Arial", 10),
        bg="#4CAF50",
        fg="#ffffff"
    )
    footer_label.pack(pady=10)

    # Main loop to run the Tkinter application
    root.mainloop()



In [18]:
def display_content(content):
    def start_research():
        open_research_page()
        
    def show_predictions():
        # Predict case details only when the button/key is pressed
        preceding, legal_laws, result = predict_case_details(content)
        predictions_summary = f"Preceding Case:\n{preceding}\n\nLegal Laws:\n{legal_laws}\n\nResult:\n{result}\n"

        predictions_window = tk.Toplevel(content_window)
        predictions_window.title("Case Predictions")
        predictions_area = Text(predictions_window, wrap='word')
        predictions_area.insert('1.0', predictions_summary)
        predictions_area.config(state='disabled')
        predictions_area.pack(expand=True, fill='both')
    
    content_window = tk.Toplevel()
    content_window.title("Extracted Content")
    text_area = Text(content_window, wrap='word')
    text_area.insert('1.0', "Extracted Content:\n\n" + content + "\n\n")
    text_area.config(state='disabled')
    text_area.pack(expand=True, fill='both')
    content_window.geometry("600x500")
    
    # Button to show predictions
    predict_btn = tk.Button(content_window, text="Show Predictions", command=show_predictions, bg="#4CAF50", fg="white", font=("Arial", 12))
    predict_btn.pack(pady=10)
    
    # Bind the 'p' key to show predictions
    content_window.bind('<p>', lambda event: show_predictions())
    
    # Button to start research
    research_btn = tk.Button(content_window, text="Start Research", command=start_research, bg="#008CBA", fg="white", font=("Arial", 12))
    research_btn.pack(pady=10)



In [19]:
def extract_pdf(file_path):
    print(f"Processing PDF: {file_path}")
    content = ""
    try:
        with open(file_path, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text = page.extract_text()
                if text:
                    content += text + "\n"
    except Exception as e:
        print(f"Error processing PDF: {e}")
    return content.strip()

In [20]:
def extract_docx(file_path):
    print(f"Processing DOCX: {file_path}")
    content = ""
    try:
        doc = docx.Document(file_path)
        for paragraph in doc.paragraphs:
            content += paragraph.text + "\n"
    except Exception as e:
        print(f"Error processing DOCX: {e}")
    return content.strip()

In [21]:
def extract_txt(file_path):
    print(f"Processing TXT: {file_path}")
    content = ""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
    except Exception as e:
        print(f"Error processing TXT: {e}")
    return content.strip()

In [22]:
def extract_entities(text):
    try:
        entities = ner_pipeline(text)
        return entities
    except Exception as e:
        print(f"Error extracting entities: {e}")
        return []

In [23]:
def search_indian_kanoon(query_text):
    query_embedding = embedding_model.encode([query_text])

    case_descriptions = cases_df['Case_Title'].tolist()
    case_embeddings = embedding_model.encode(case_descriptions)

    similarities = cosine_similarity(query_embedding, case_embeddings)

    top_n = 5
    top_n_indices = similarities[0].argsort()[-top_n:][::-1]

    case_matches = []
    for idx in top_n_indices:
            case_matches.append({
                'Case_Title': cases_df.iloc[idx]['Case_Title'],
                'Similarity': similarities[0][idx]
        })
    return case_matches


In [24]:
# Combine relevant columns for features
cases_df['Combined_Text'] = cases_df[['Case_Title', 'Citation', 'Court', 
                                      'Judgement_Date', 'Judges', 'Petitioner',
                                      'Respondent', 'Bench']].apply(lambda x: ' '.join(x.astype(str)), axis=1)

# Define features (X) and targets (y)
X = cases_df['Combined_Text']
y = cases_df[['Preceding', 'Legal_Laws', 'Result']]

# TF-IDF vectorization
tfidf = TfidfVectorizer(max_features=5000)
X_vectorized = tfidf.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# MultiOutputClassifier with Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
multi_rf = MultiOutputClassifier(rf_model)
multi_rf.fit(X_train, y_train)

# Save the trained model and vectorizer
joblib.dump(multi_rf, 'random_forest_model.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')

print("Model training complete and saved!")


Model training complete and saved!


In [25]:
def predict_case_details(content):
    """
    Predict Preceding, Legal Laws, and Result based on uploaded content.
    """
    try:
        multi_rf = joblib.load('random_forest_model.pkl')
        tfidf = joblib.load('tfidf_vectorizer.pkl')
        content_vector = tfidf.transform([content])
        predictions = multi_rf.predict(content_vector)
        preceding, legal_laws, result = predictions[0]

        return preceding, legal_laws, result
    except Exception as e:
        print(f"Prediction error: {e}")
        return None, None, None


In [26]:
def predict_query_details(query_text):
    """
    Predict Precedings, Legal Laws, and Result based on the search query.
    """
    try:
        # Load model and vectorizer
        multi_rf = joblib.load('random_forest_model.pkl')
        tfidf = joblib.load('tfidf_vectorizer.pkl')

        # Vectorize the query text
        query_vector = tfidf.transform([query_text])

        # Predict
        predictions = multi_rf.predict(query_vector)
        preceding, legal_laws, result = predictions[0]

        return preceding, legal_laws, result
    except Exception as e:
        print(f"Prediction error for query: {e}")
        return None, None, None


In [27]:
def vectorize_case_titles():
    case_titles = cases_df['Case_Title'].tolist()

    # Use SentenceTransformer to get embeddings
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    case_vectors = model.encode(case_titles)

    return case_titles, case_vectors

In [28]:
def find_related_cases(query, case_titles, case_vectors, top_n=5):
    # Vectorize the query using SentenceTransformer
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    query_vector = model.encode([query])

    # Perform KNN search
    knn = NearestNeighbors(n_neighbors=top_n, metric='cosine')
    knn.fit(case_vectors)
    distances, indices = knn.kneighbors(query_vector)

    # Get the most similar case titles based on the indices
    related_cases = [case_titles[idx] for idx in indices[0]]
    return related_cases

In [29]:
def start_progress_bar():
    progress_bar.pack(pady=10)
    progress_bar.start(10)

In [30]:
def stop_progress_bar():
    progress_bar.stop()
    progress_bar.pack_forget()

In [31]:
def search_entities():
    query = search_entry.get()
    if query:
        threading.Thread(target=process_search, args=(query,), daemon=True).start()
        start_progress_bar()
    else:
        messagebox.showinfo("Info", "Please enter a search term.")

In [32]:
def display_search_results(case_links, query_text):
    results_window = tk.Toplevel()
    results_window.title("Search Results")
    results_window.geometry("600x500")
    text_area = Text(results_window, wrap='word')
    content = "Top Similar Cases Based on Semantic Search:\n\n"
    for link in case_links:
        content += f"{link['Case_Title']} (Similarity: {link['Similarity']:.2f})\n"
    text_area.insert('1.0', content)
    text_area.config(state='disabled')
    text_area.pack(expand=True, fill='both')

    # Add Predict button
    predict_button = tk.Button(
        results_window,
        text="Predict",
        command=lambda: show_predictions_window(query_text), 
        bg="#4CAF50", 
        fg="white", 
        font=("Arial", 12)
    )
    predict_button.pack(pady=10)


In [33]:
def show_predictions_window(query_text):
    try:
        preceding, legal_laws, result = predict_query_details(query_text)
        predictions_window = tk.Toplevel()
        predictions_window.title("Query Predictions")
        predictions_window.geometry("600x400")

        text_area = Text(predictions_window, wrap='word')
        predictions_summary = (
            f"Search Query Predictions:\n\n"
            f"Preceding Case:\n{preceding if preceding else 'N/A'}\n\n"
            f"Legal Laws:\n{legal_laws if legal_laws else 'N/A'}\n\n"
            f"Result:\n{result if result else 'N/A'}\n"
        )
        text_area.insert('1.0', predictions_summary)
        text_area.config(state='disabled')
        text_area.pack(expand=True, fill='both')

    except Exception as e:
        messagebox.showerror("Error", f"An error occurred while displaying predictions: {e}")


In [34]:
def process_search(query):
    try:
        case_links = search_indian_kanoon(query)
        preceding, legal_laws, result = predict_query_details(query)
        print(f"Predicted Preceding: {preceding}")
        print(f"Predicted Legal Laws: {legal_laws}")
        print(f"Predicted Result: {result}")
        display_search_results(case_links,query)
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during the search: {e}")
    finally:
        stop_progress_bar()


In [35]:
# Initialize the Tkinter GUI
root = tk.Tk()
root.title("File Upload and Content Extractor")
root.geometry("600x400")
root.config(bg="#f0f0f0")

In [36]:
instruction_label = tk.Label(root, text="Upload the case file as .pdf, .docx or .txt",
                               bg="#f0f0f0", font=("Arial", 12))
instruction_label.pack(pady=10)

In [37]:
# Entry for search term
search_entry = tk.Entry(root, width=50, font=("Arial", 12))
search_entry.pack(pady=10)

In [38]:
# Search button
search_btn = tk.Button(root, text="Search", command=search_entities, width=20, bg="#4CAF50", fg="white", font=("Arial", 12))
search_btn.pack(pady=10)

In [39]:
# Frame for the upload button
frame = tk.Frame(root, bg="#ffffff", bd=2, relief=tk.GROOVE)
frame.pack(pady=20, padx=10, fill='both', expand=True)

In [40]:
upload_btn = tk.Button(frame, text="Upload File", command=upload_file, width=20, bg="#4CAF50", fg="white", font=("Arial", 12))
upload_btn.pack(pady=10)

In [41]:
# Label to display selected file path
file_label = tk.Label(frame, text="Selected File Path: ", bg="#f0f0f0", font=("Arial", 10))
file_label.pack(pady=5)

In [42]:
# Progress bar (initially hidden)
progress_bar = ttk.Progressbar(root, orient="horizontal", mode="indeterminate")

In [43]:
# Start the Tkinter event loop
root.mainloop()

In [48]:
def open_research_page():
    import warnings
    warnings.filterwarnings('ignore')

    import os
    import tempfile
    import tkinter as tk
    from tkinter import filedialog, messagebox, Text, Scrollbar
    from tkinter import ttk  # For themed widgets

    import torch
    import numpy as np
    import cohere
    import spacy
    from nltk.tokenize import word_tokenize
    from nltk import pos_tag
    from pdfminer.high_level import extract_text
    from nltk.tokenize.texttiling import TextTilingTokenizer

    from transformers import AutoTokenizer, AutoModel

    import nltk

    # Initialize models and APIs
    co = cohere.Client(os.environ.get("2thOmTVyrX5uwYuSieF1AhVKxRwP3UuZxakIiXc3"))
    nlp = spacy.load("en_core_web_sm")
    tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
    model = AutoModel.from_pretrained("law-ai/InLegalBERT")
    tiling_tokenizer = TextTilingTokenizer()

    # Functions
    def generate_response(prompt, embeddings):
        aggregated_embedding = np.mean([np.mean(embed) for embed in embeddings])
        embedding_str = f"Embedding summary: {aggregated_embedding:.2f}"
        full_prompt = f"{embedding_str}\n\n{prompt}"
        
        try:
            response = co.chat(
                model="command-xlarge-nightly",
                message=full_prompt,
                max_tokens=750
            )
            return response.text.strip()
        except cohere.error.CohereError as e:
            return f"An error occurred: {str(e)}"

    def extract_text_from_pdf(pdf_path):
        return extract_text(pdf_path)

    def get_bert_embeddings(texts):
        embeddings_list = []
        for text in texts:
            inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            with torch.no_grad():
                outputs = model(**inputs)
            embeddings = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
            embeddings_list.append(embeddings)
        return embeddings_list

    def process_pdf_and_generate_response(pdf_path, query):
        document_text = extract_text_from_pdf(pdf_path)
        text_chunks = tiling_tokenizer.tokenize(document_text)
        document_embeddings = get_bert_embeddings(text_chunks)
        
        prompt = f"You are an AI-driven research engine for commercial courts. Given the legal document: '{document_text[:2000]}', answer the query: '{query}'"
        response = generate_response(prompt, document_embeddings)
        return response

    def select_pdf():
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
        pdf_path_var.set(file_path)

    def run_engine():
        pdf_path = pdf_path_var.get()
        query = query_entry.get("1.0", tk.END).strip()
        if not pdf_path or not query:
            messagebox.showerror("Error", "Please upload a PDF and enter a query.")
            return
        
        try:
            response = process_pdf_and_generate_response(pdf_path, query)
            response_text.delete("1.0", tk.END)
            response_text.insert(tk.END, response)
        except Exception as e:
            messagebox.showerror("Error", str(e))

    # Tkinter GUI
    root = tk.Tk()
    root.title("AI Research Engine for Legal Documents")
    root.geometry("850x700")
    root.configure(bg="#4CAF50")

    # Fonts
    title_font = ("Arial", 18, "bold")
    label_font = ("Arial", 12)
    button_font = ("Arial", 11)

    # Frames
    title_frame = tk.Frame(root, bg="#4CAF50", height=60)
    title_frame.pack(fill=tk.X)

    content_frame = tk.Frame(root, bg="#f5f5f5", padx=20, pady=20)
    content_frame.pack(fill=tk.BOTH, expand=True)

    footer_frame = tk.Frame(root, bg="#4CAF50", height=40)
    footer_frame.pack(fill=tk.X)

    # Title
    title_label = tk.Label(
        title_frame,
        text="AI-Driven Research Engine for Commercial Courts",
        font=title_font,
        bg="#4CAF50",
        fg="#ffffff"
    )
    title_label.pack(pady=10)

    # File Upload
    pdf_path_var = tk.StringVar()
    file_frame = tk.LabelFrame(content_frame, text="Upload Document", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    file_frame.pack(fill="x", pady=10)

    file_entry = ttk.Entry(file_frame, textvariable=pdf_path_var, width=60, state='readonly')
    file_entry.grid(row=0, column=0, padx=5, pady=5)

    browse_button = ttk.Button(file_frame, text="Browse", command=select_pdf)
    browse_button.grid(row=0, column=1, padx=5, pady=5)

    # Query Input
    query_frame = tk.LabelFrame(content_frame, text="Enter Query", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    query_frame.pack(fill="x", pady=10)

    query_entry = Text(query_frame, height=5, wrap=tk.WORD, font=("Arial", 10))
    query_entry.pack(fill="x", padx=5, pady=5)

    # Run Button
    run_button = ttk.Button(content_frame, text="Generate Response", command=run_engine)
    run_button.pack(pady=10)

    # Response Display
    response_frame = tk.LabelFrame(content_frame, text="Response", font=label_font, bg="#f5f5f5", padx=10, pady=10)
    response_frame.pack(fill="both", expand=True, pady=10)

    response_text = Text(response_frame, wrap=tk.WORD, font=("Arial", 10), height=15)
    response_text.pack(fill="both", expand=True, padx=5, pady=5)

    response_scrollbar = Scrollbar(response_frame, command=response_text.yview)
    response_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    response_text.config(yscrollcommand=response_scrollbar.set)

    # Footer
    footer_label = tk.Label(
        footer_frame,
        text="© 2024 AI Research Engine",
        font=("Arial", 10),
        bg="#4CAF50",
        fg="#ffffff"
    )
    footer_label.pack(pady=10)

    # Mainloop
    root.mainloop()
open_research_page()