In [9]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from docx import Document as WordDocument
from docx.shared import Inches  # For adding the logo

key = "Here I have to put my private key"

CHROMA_PATH = "chroma"
SOPS_PATH = "SOPS"

# Hardcoded path to the logo
LOGO_PATH = "logo.jpg"  # Update this to the actual path of your logo file

# Embedding function
def get_embedding_function():
    return OpenAIEmbeddings(openai_api_key=key)

# RAG Preparation: Load and split documents
def prepare_rag():
    document_loader = PyPDFDirectoryLoader(SOPS_PATH)
    documents = document_loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80, length_function=len)
    chunks = text_splitter.split_documents(documents)

    # Add to Chroma
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    db.add_documents(chunks)
    db.persist()
    return db

# SOP generation logic
def generate_sop(selected_sections, instrument_name, brand, model, output_path):
    # Handle optional Brand and Model
    brand_text = f"Brand: {brand}" if brand else "Brand: Not specified"
    model_text = f"Model: {model}" if model else "Model: Not specified"

    # Fetch context from Chroma DB
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    results = db.similarity_search("Generate SOP based on selected structure.", k=10)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in results])

    # Construct SOP based on selected sections
    context = f"""
    Instrument: {instrument_name}
    {brand_text}
    {model_text}

    Sections included:
    {', '.join(selected_sections)}

    Context:
    {context_text}
    """
    prompt = f"""
       Use the following context to create a detailed Standard Operating Procedure (SOP) 
    for the specified query. The SOP must follow the selected structure:

    1. Scope: Describe the purpose and application of the SOP in detail.
    2. Background: Provide historical and scientific context.
    3. Safety: Outline all safety measures, PPE, and hazard precautions.
    4. Materials Required: Include a comprehensive list of equipment and consumables.
    5. Standards and Controls: Detail quality control and regulatory standards.
    6. Calibration: Describe step-by-step calibration procedures.
    7. Procedures: Provide detailed operational steps with sub-steps.
    8. Sampling: Explain methods and best practices for sampling.
    9. Calculations: Include detailed formulae and calculation methods.
    10. Uncertainty of Measurement: Describe how uncertainty is measured and mitigated.
    11. Limitations: Explain constraints and situations where this SOP may not apply.
    12. Documentation: Outline documentation requirements and templates.
    13. References: Provide scientific and technical references.

    Ensure that each section contains detailed and relevant content based on the provided context.
    Make sure to include at least 100 words for every section
    
    {context}
    """
    model = ChatOpenAI(model="gpt-4", openai_api_key=key)
    response = model.invoke(prompt)
    sop_text = response.content if response else "No response generated."

    save_sop_to_word(sop_text, output_path)

def save_sop_to_word(sop_text, output_path):
    doc = WordDocument()
    if LOGO_PATH:
        try:
            doc.add_picture(LOGO_PATH, width=Inches(2))  # Add logo at the top
        except Exception as e:
            print(f"Error adding logo: {e}")
    doc.add_heading("Standard Operating Procedure (SOP)", level=1)
    sections = sop_text.split("\n\n")
    for section in sections:
        if ":" in section:
            header, content = section.split(":", 1)
            doc.add_heading(header.strip(), level=2)
            doc.add_paragraph(content.strip())
        else:
            doc.add_paragraph(section.strip())
    doc.save(output_path)
    print(f"SOP saved to {output_path}")

# GUI setup
def run_gui():
    root = tk.Tk()
    root.title("Enhanced SOP Generator")
    root.geometry("800x700")

    # Variables
    output_path_var = tk.StringVar()
    instrument_name_var = tk.StringVar()
    brand_var = tk.StringVar()
    model_var = tk.StringVar()

    # Selected sections
    selected_sections = []

    # Functions
    def browse_output_path():
        file_path = filedialog.asksaveasfilename(defaultextension=".docx", filetypes=[("Word Documents", "*.docx")])
        if file_path:
            output_path_var.set(file_path)

    def toggle_section(section):
        if section == "Select All":
            if "Select All" not in selected_sections:
                selected_sections.clear()
                selected_sections.extend(sections)  # Add all sections
                refresh_checkboxes(True)
            else:
                selected_sections.clear()
                refresh_checkboxes(False)
        else:
            if section in selected_sections:
                selected_sections.remove(section)
            else:
                selected_sections.append(section)

    def refresh_checkboxes(state):
        for var, cb in checkbox_vars:
            var.set(state)

    def generate_sop_task():
        try:
            instrument_name = instrument_name_var.get()
            brand = brand_var.get()
            model = model_var.get()
            output_path = output_path_var.get()

            if not instrument_name:
                messagebox.showerror("Error", "Instrument name is required.")
                return

            if not selected_sections:
                messagebox.showerror("Error", "At least one section must be selected.")
                return

            # Generate default output file name if not set
            if not output_path:
                filename = f"{instrument_name}_{brand or 'NoBrand'}_{model or 'NoModel'}.docx"
                output_path_var.set(filename)

            generate_sop(selected_sections, instrument_name, brand, model, output_path)
            messagebox.showinfo("Success", f"SOP generated and saved to {output_path}")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    # GUI Layout
    ttk.Label(root, text="Enhanced SOP Generator", font=("Arial", 16)).pack(pady=10)

    frame = ttk.Frame(root, padding=10)
    frame.pack(fill="both", expand=True)

    ttk.Label(frame, text="Instrument Name:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=instrument_name_var, width=50).grid(row=0, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Brand (Optional):").grid(row=1, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=brand_var, width=50).grid(row=1, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Model (Optional):").grid(row=2, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=model_var, width=50).grid(row=2, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Sections to Include:").grid(row=3, column=0, sticky="nw", padx=5, pady=5)

    # Add checkboxes for sections
    sections = [
        "Select All",  # Option for selecting all sections
        "Scope",
        "Background",
        "Safety",
        "Materials and Equipment",
        "Standards and Calibration",
        "Procedure",
        "Quality Control and Data Validation",
        "Data Analysis and Reporting",
        "Limitations and Interferences",
        "Health and Environmental Safety",
        "Documentation and References",
        "Special Precautions and Handling",
    ]
    checkbox_vars = []  # Store checkbox variables for easy refresh
    for i, section in enumerate(sections):
        var = tk.BooleanVar()
        cb = tk.Checkbutton(frame, text=section, variable=var, command=lambda s=section: toggle_section(s))
        cb.grid(row=4 + i, column=0, sticky="w", padx=20, pady=2)
        checkbox_vars.append((var, cb))

    ttk.Label(frame, text="Output Path:").grid(row=4 + len(sections), column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=output_path_var, width=50).grid(row=4 + len(sections), column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=browse_output_path).grid(row=4 + len(sections), column=2, padx=5, pady=5)

    ttk.Button(frame, text="Generate SOP", command=generate_sop_task).grid(row=5 + len(sections), column=0, columnspan=3, pady=20)

    root.mainloop()

if __name__ == "__main__":
    prepare_rag()  # Prepare the RAG system
    run_gui()




Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 45 0 (offset 0)


In [11]:

import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from docx import Document as WordDocument
from docx.shared import Inches
import fitz  # PyMuPDF for PDF processing
import os

# Constants
key = "private key"
CHROMA_PATH = "chroma"
DATA_PATH = "SOPS"
LOGO_PATH = "logo.jpg"

# Embedding function
def get_embedding_function():
    return OpenAIEmbeddings(openai_api_key=key)

# Load documents
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

# Split documents into chunks
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80, length_function=len)
    return text_splitter.split_documents(documents)

# Add chunks to Chroma database
def add_to_chroma(chunks: list[Document]):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    db.add_documents(chunks)
    db.persist()

# Extract content from PDF (first few pages only)
def extract_manual_content(pdf_path, max_pages=10):
    doc = fitz.open(pdf_path)
    text = []
    for page_num in range(min(len(doc), max_pages)):
        page = doc[page_num]
        text.append(page.get_text())
    return "\n".join(text)

# Process uploaded manual file
def process_manual_file(manual_path):
    try:
        manual_text = extract_manual_content(manual_path, max_pages=10)
        documents = load_documents()
        documents.append(Document(page_content=manual_text, metadata={"source": manual_path}))
        chunks = split_documents(documents)
        add_to_chroma(chunks)
        messagebox.showinfo("Success", "Manual file processed and added to the database.")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to process manual file: {str(e)}")

# SOP generation logic
def generate_sop(selected_sections, instrument_name, brand, model, output_path):
    brand_text = f"Brand: {brand}" if brand else "Brand: Not specified"
    model_text = f"Model: {model}" if model else "Model: Not specified"
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    results = db.similarity_search("Generate SOP based on selected structure.", k=10)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in results])
    context = f"""
    Instrument: {instrument_name}
    {brand_text}
    {model_text}
    Sections included: {', '.join(selected_sections)}
    Context: {context_text}
    """
    prompt = f"""
    Use the following context to create a detailed SOP for the specified query. Ensure each section contains at least 100 words:
    {context}
    """
    model = ChatOpenAI(model="gpt-4", openai_api_key=key)
    response = model.invoke(prompt)
    sop_text = response.content if response else "No response generated."
    save_sop_to_word(sop_text, output_path)

# Save SOP to Word document
def save_sop_to_word(sop_text, output_path):
    doc = WordDocument()
    if LOGO_PATH and os.path.exists(LOGO_PATH):
        try:
            doc.add_picture(LOGO_PATH, width=Inches(2))
        except Exception as e:
            print(f"Error adding logo: {e}")
    doc.add_heading("Standard Operating Procedure (SOP)", level=1)
    sections = sop_text.split("\n\n")
    for section in sections:
        if ":" in section:
            header, content = section.split(":", 1)
            doc.add_heading(header.strip(), level=2)
            doc.add_paragraph(content.strip())
        else:
            doc.add_paragraph(section.strip())
    doc.save(output_path)
    print(f"SOP saved to {output_path}")

# GUI setup
def run_gui():
    root = tk.Tk()
    root.title("Enhanced SOP Generator")
    root.geometry("800x750")

    # Variables
    output_path_var = tk.StringVar()
    instrument_name_var = tk.StringVar()
    brand_var = tk.StringVar()
    model_var = tk.StringVar()
    manual_file_var = tk.StringVar()
    selected_sections = []

    # Functions
    def browse_output_path():
        file_path = filedialog.asksaveasfilename(defaultextension=".docx", filetypes=[("Word Documents", "*.docx")])
        if file_path:
            output_path_var.set(file_path)

    def upload_manual_file():
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")])
        if file_path:
            manual_file_var.set(file_path)
            process_manual_file(file_path)

    def toggle_section(section):
        if section in selected_sections:
            selected_sections.remove(section)
        else:
            selected_sections.append(section)

    def generate_sop_task():
        try:
            instrument_name = instrument_name_var.get()
            brand = brand_var.get()
            model = model_var.get()
            output_path = output_path_var.get()
            if not instrument_name:
                messagebox.showerror("Error", "Instrument name is required.")
                return
            if not selected_sections:
                messagebox.showerror("Error", "At least one section must be selected.")
                return
            if not output_path:
                messagebox.showerror("Error", "Output path is required.")
                return
            generate_sop(selected_sections, instrument_name, brand, model, output_path)
            messagebox.showinfo("Success", f"SOP generated and saved to {output_path}")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    # GUI Layout
    ttk.Label(root, text="Enhanced SOP Generator", font=("Arial", 16)).pack(pady=10)

    frame = ttk.Frame(root, padding=10)
    frame.pack(fill="both", expand=True)

    ttk.Label(frame, text="Instrument Name:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=instrument_name_var, width=50).grid(row=0, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Brand (Optional):").grid(row=1, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=brand_var, width=50).grid(row=1, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Model (Optional):").grid(row=2, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=model_var, width=50).grid(row=2, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Manual File:").grid(row=3, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=manual_file_var, width=50, state="readonly").grid(row=3, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=upload_manual_file).grid(row=3, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Output Path:").grid(row=4, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=output_path_var, width=50).grid(row=4, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=browse_output_path).grid(row=4, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Sections to Include:").grid(row=5, column=0, sticky="nw", padx=5, pady=5)
    sections = [
        "Scope", "Background", "Safety", "Materials and Equipment",
        "Standards and Calibration", "Procedure", "Quality Control",
        "Data Analysis", "Limitations", "Documentation", "References"
    ]
    for i, section in enumerate(sections):
        var = tk.BooleanVar()
        cb = tk.Checkbutton(frame, text=section, variable=var, command=lambda s=section: toggle_section(s))
        cb.grid(row=6 + i, column=0, sticky="w", padx=20, pady=2)

    ttk.Button(frame, text="Generate SOP", command=generate_sop_task).grid(row=6 + len(sections

), column=0, columnspan=3, pady=20)

    root.mainloop()

if __name__ == "__main__":
    run_gui()


Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 45 0 (offset 0)
  model = ChatOpenAI(model="gpt-4", openai_api_key=key)


SOP saved to C:/Users/roxan/OneDrive/Desktop/Research/Research-AIChemistry-Competition/ICP_700.docx


In [None]:

import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from docx import Document as WordDocument
from docx.shared import Inches
import fitz  # PyMuPDF for PDF processing
import os

# Constants
key = "private key"
CHROMA_PATH = "chroma"
DATA_PATH = "SOPS"
LOGO_PATH = "logo.jpg"

# Embedding function
def get_embedding_function():
    return OpenAIEmbeddings(openai_api_key=key)

# Load documents
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

# Split documents into chunks
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80, length_function=len)
    return text_splitter.split_documents(documents)

# Add chunks to Chroma database
def add_to_chroma(chunks: list[Document]):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    db.add_documents(chunks)
    db.persist()

# Extract content from PDF (first few pages only)
def extract_manual_content(pdf_path, max_pages=10):
    doc = fitz.open(pdf_path)
    text = []
    for page_num in range(min(len(doc), max_pages)):
        page = doc[page_num]
        text.append(page.get_text())
    return "\n".join(text)

# Process uploaded manual file
def process_manual_file(manual_path):
    try:
        manual_text = extract_manual_content(manual_path, max_pages=10)
        documents = load_documents()
        documents.append(Document(page_content=manual_text, metadata={"source": manual_path}))
        chunks = split_documents(documents)
        add_to_chroma(chunks)
        messagebox.showinfo("Success", "Manual file processed and added to the database.")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to process manual file: {str(e)}")

# SOP generation logic
def generate_sop(selected_sections, instrument_name, brand, model, output_path):
    brand_text = f"Brand: {brand}" if brand else "Brand: Not specified"
    model_text = f"Model: {model}" if model else "Model: Not specified"
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    results = db.similarity_search("Generate SOP based on selected structure.", k=10)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in results])
    
    # Updated prompt
    prompt = f"""
    Use the following context to create a detailed Standard Operating Procedure (SOP). 
    Include comprehensive details for each section, including examples, explanations, and visuals. Don't summarize the details and keep the ideas unique and not redundant. 

    Structure:
    1. Scope: Defines the purpose and applicability of the procedure, often focusing on specific uses, such as analyzing air samples, tobacco content, or hydrocarbons in automotive exhaust.
    2. Background: Provides context or rationale for the method being standardized, often referencing related research or industry needs.
    3. Safety: Highlights safety precautions, including the use of personal protective equipment (PPE), handling hazardous chemicals, and waste disposal protocols.
    4. Materials and Equipment: Lists all necessary reagents, solvents, instrumentation, and ancillary tools required for conducting the procedure.
    5. Standards and Calibration: Details the required reference materials, standards for quality control, and steps for instrument calibration to ensure accuracy.
    6. Procedure: Offers step-by-step instructions for conducting the analysis, including sample preparation, injection, instrument operation, and troubleshooting.
    7. Quality Control and Data Validation: Focuses on performance checks, validation criteria, and measures to ensure data reliability.
    8. Data Analysis and Reporting: Describes how results should be processed, analyzed, and documented, often including specific formats or tools.
    9. Limitations and Interferences: Identifies constraints of the method, such as compounds that may not be detected or interferences that could affect results.
    10. Health and Environmental Safety: Emphasizes precautions to protect operators and minimize environmental impact during the procedure.
    11. Documentation and References: Specifies record-keeping requirements and cites foundational literature or external standards that the SOP builds upon.
    12. Special Precautions and Handling: Covers specific handling instructions for sensitive or hazardous materials used in the procedure.

    Context:
    {context_text}

    Question: Generate a detailed SOP for the specified instrument and sections.
    Instrument: {instrument_name}
    {brand_text}
    {model_text}
    Sections to include: {', '.join(selected_sections)}
    """
    
    model = ChatOpenAI(model="gpt-4", openai_api_key=key)
    response = model.invoke(prompt)
    sop_text = response.content if response else "No response generated."
    save_sop_to_word(sop_text, output_path)


# Save SOP to Word document
def save_sop_to_word(sop_text, output_path):
    doc = WordDocument()
    if LOGO_PATH and os.path.exists(LOGO_PATH):
        try:
            doc.add_picture(LOGO_PATH, width=Inches(2))
        except Exception as e:
            print(f"Error adding logo: {e}")
    doc.add_heading("Standard Operating Procedure (SOP)", level=1)
    sections = sop_text.split("\n\n")
    for section in sections:
        if ":" in section:
            header, content = section.split(":", 1)
            doc.add_heading(header.strip(), level=2)
            doc.add_paragraph(content.strip())
        else:
            doc.add_paragraph(section.strip())
    doc.save(output_path)
    print(f"SOP saved to {output_path}")

# GUI setup
def run_gui():
    root = tk.Tk()
    root.title("Enhanced SOP Generator")
    root.geometry("800x750")

    # Variables
    output_path_var = tk.StringVar()
    instrument_name_var = tk.StringVar()
    brand_var = tk.StringVar()
    model_var = tk.StringVar()
    manual_file_var = tk.StringVar()
    selected_sections = []

    # Functions
    def browse_output_path():
        file_path = filedialog.asksaveasfilename(defaultextension=".docx", filetypes=[("Word Documents", "*.docx")])
        if file_path:
            output_path_var.set(file_path)

    def upload_manual_file():
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")])
        if file_path:
            manual_file_var.set(file_path)
            process_manual_file(file_path)

    def toggle_section(section):
        if section in selected_sections:
            selected_sections.remove(section)
        else:
            selected_sections.append(section)

    def generate_sop_task():
        try:
            instrument_name = instrument_name_var.get()
            brand = brand_var.get()
            model = model_var.get()
            output_path = output_path_var.get()
            if not instrument_name:
                messagebox.showerror("Error", "Instrument name is required.")
                return
            if not selected_sections:
                messagebox.showerror("Error", "At least one section must be selected.")
                return
            if not output_path:
                messagebox.showerror("Error", "Output path is required.")
                return
            generate_sop(selected_sections, instrument_name, brand, model, output_path)
            messagebox.showinfo("Success", f"SOP generated and saved to {output_path}")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    # GUI Layout
    ttk.Label(root, text="Enhanced SOP Generator", font=("Arial", 16)).pack(pady=10)

    frame = ttk.Frame(root, padding=10)
    frame.pack(fill="both", expand=True)

    ttk.Label(frame, text="Instrument Name:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=instrument_name_var, width=50).grid(row=0, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Brand (Optional):").grid(row=1, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=brand_var, width=50).grid(row=1, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Model (Optional):").grid(row=2, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=model_var, width=50).grid(row=2, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Manual File:").grid(row=3, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=manual_file_var, width=50, state="readonly").grid(row=3, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=upload_manual_file).grid(row=3, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Output Path:").grid(row=4, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=output_path_var, width=50).grid(row=4, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=browse_output_path).grid(row=4, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Sections to Include:").grid(row=5, column=0, sticky="nw", padx=5, pady=5)
    sections = [
        "Scope", "Background", "Safety", "Materials and Equipment",
        "Standards and Calibration", "Procedure", "Quality Control",
        "Data Analysis", "Limitations", "Documentation", "References"
    ]
    for i, section in enumerate(sections):
        var = tk.BooleanVar()
        cb = tk.Checkbutton(frame, text=section, variable=var, command=lambda s=section: toggle_section(s))
        cb.grid(row=6 + i, column=0, sticky="w", padx=20, pady=2)

    ttk.Button(frame, text="Generate SOP", command=generate_sop_task).grid(row=6 + len(sections

), column=0, columnspan=3, pady=20)

    root.mainloop()

if __name__ == "__main__":
    run_gui()



Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 45 0 (offset 0)


In [5]:
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from docx import Document as WordDocument
from docx.shared import Inches
import fitz  # PyMuPDF for PDF processing
import os

# Constants
key = "private key"
CHROMA_PATH = "chroma"
DATA_PATH = "SOPS"
LOGO_PATH = "logo.jpg"

# Embedding function
def get_embedding_function():
    return OpenAIEmbeddings(openai_api_key=key)

# Load documents
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

# Split documents into chunks
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80, length_function=len)
    return text_splitter.split_documents(documents)

# Add chunks to Chroma database
def add_to_chroma(chunks: list[Document]):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    db.add_documents(chunks)
    db.persist()

# Extract content from PDF (first few pages only)
def extract_manual_content(pdf_path, max_pages=10):
    doc = fitz.open(pdf_path)
    text = []
    for page_num in range(min(len(doc), max_pages)):
        page = doc[page_num]
        text.append(page.get_text())
    return "\n".join(text)

# Process uploaded manual file
def process_manual_file(manual_path):
    try:
        manual_text = extract_manual_content(manual_path, max_pages=10)
        documents = load_documents()
        documents.append(Document(page_content=manual_text, metadata={"source": manual_path}))
        chunks = split_documents(documents)
        add_to_chroma(chunks)
        messagebox.showinfo("Success", "Manual file processed and added to the database.")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to process manual file: {str(e)}")

# SOP generation logic
def generate_sop(selected_sections, instrument_name, brand, model, output_path):
    brand_text = f"Brand: {brand}" if brand else "Brand: Not specified"
    model_text = f"Model: {model}" if model else "Model: Not specified"
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
    results = db.similarity_search("Generate SOP based on selected structure.", k=10)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in results])
    
    # Updated prompt
    prompt = f"""
    Use the following context to create a detailed Standard Operating Procedure (SOP). 
    Include comprehensive details for each section, including examples, explanations, and visuals. Don't summarize the details and keep the ideas unique and not redundant. 

    prompt = f""Create the '{section}' section for a Standard Operating Procedure (SOP) based on the provided context.
        Ensure the content follows these guidelines:
        - Use formal and technical language typical of SOPs.
        - Align the structure and terminology with the provided context.
        - Maintain clarity and conciseness.
        Context:
        
    Structure:
    1. Scope: Defines the purpose and applicability of the procedure, often focusing on specific uses, such as analyzing air samples, tobacco content, or hydrocarbons in automotive exhaust.
    2. Background: Provides context or rationale for the method being standardized, often referencing related research or industry needs.
    3. Safety: Highlights safety precautions, including the use of personal protective equipment (PPE), handling hazardous chemicals, and waste disposal protocols.
    4. Materials and Equipment: Lists all necessary reagents, solvents, instrumentation, and ancillary tools required for conducting the procedure.
    5. Standards and Calibration: Details the required reference materials, standards for quality control, and steps for instrument calibration to ensure accuracy.
    6. Procedure: Offers step-by-step instructions for conducting the analysis, including sample preparation, injection, instrument operation, and troubleshooting.
    7. Quality Control and Data Validation: Focuses on performance checks, validation criteria, and measures to ensure data reliability.
    8. Data Analysis and Reporting: Describes how results should be processed, analyzed, and documented, often including specific formats or tools.
    9. Limitations and Interferences: Identifies constraints of the method, such as compounds that may not be detected or interferences that could affect results.
    10. Health and Environmental Safety: Emphasizes precautions to protect operators and minimize environmental impact during the procedure.
    11. Documentation and References: Specifies record-keeping requirements and cites foundational literature or external standards that the SOP builds upon.
    12. Special Precautions and Handling: Covers specific handling instructions for sensitive or hazardous materials used in the procedure.
    

    Context:
    {context_text}

    Question: Generate a detailed SOP for the specified instrument and sections.
    Instrument: {instrument_name}
    {brand_text}
    {model_text}
    Sections to include: {', '.join(selected_sections)}
    """
    
    model = ChatOpenAI(model="gpt-4", openai_api_key=key)
    response = model.invoke(prompt)
    sop_text = response.content if response else "No response generated."
    save_sop_to_word(sop_text, output_path)

# Save SOP to Word document
def save_sop_to_word(sop_text, output_path):
    doc = WordDocument()
    if LOGO_PATH and os.path.exists(LOGO_PATH):
        try:
            doc.add_picture(LOGO_PATH, width=Inches(2))
        except Exception as e:
            print(f"Error adding logo: {e}")
    doc.add_heading("Standard Operating Procedure (SOP)", level=1)
    sections = sop_text.split("\n\n")
    for section in sections:
        if ":" in section:
            header, content = section.split(":", 1)
            doc.add_heading(header.strip(), level=2)
            doc.add_paragraph(content.strip())
        else:
            doc.add_paragraph(section.strip())
    doc.save(output_path)
    print(f"SOP saved to {output_path}")

# GUI setup
def run_gui():
    root = tk.Tk()
    root.title("Enhanced SOP Generator")
    root.geometry("800x750")

    # Variables
    output_path_var = tk.StringVar()
    instrument_name_var = tk.StringVar()
    brand_var = tk.StringVar()
    model_var = tk.StringVar()
    manual_file_var = tk.StringVar()
    selected_sections = []

    # Functions
    def browse_output_path():
        file_path = filedialog.asksaveasfilename(defaultextension=".docx", filetypes=[("Word Documents", "*.docx")])
        if file_path:
            output_path_var.set(file_path)

    def upload_manual_file():
        file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")])
        if file_path:
            manual_file_var.set(file_path)
            process_manual_file(file_path)
        else:
            messagebox.showinfo("Info", "No manual uploaded. Proceeding without manual.")

    def toggle_section(section):
        if section in selected_sections:
            selected_sections.remove(section)
        else:
            selected_sections.append(section)

    def generate_sop_task():
        try:
            instrument_name = instrument_name_var.get()
            brand = brand_var.get()
            model = model_var.get()
            output_path = output_path_var.get()
            
            if not instrument_name:
                messagebox.showerror("Error", "Instrument name is required.")
                return
            if not selected_sections:
                messagebox.showerror("Error", "At least one section must be selected.")
                return
            if not output_path:
                messagebox.showerror("Error", "Output path is required.")
                return
            
            # Process manual file only if provided
            manual_path = manual_file_var.get()
            if manual_path:
                process_manual_file(manual_path)
            else:
                print("No manual file uploaded. Generating SOP without manual.")

            generate_sop(selected_sections, instrument_name, brand, model, output_path)
            messagebox.showinfo("Success", f"SOP generated and saved to {output_path}")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    # GUI Layout
    ttk.Label(root, text="Enhanced SOP Generator", font=("Arial", 16)).pack(pady=10)

    frame = ttk.Frame(root, padding=10)
    frame.pack(fill="both", expand=True)

    ttk.Label(frame, text="Instrument Name:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=instrument_name_var, width=50).grid(row=0, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Brand (Optional):").grid(row=1, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=brand_var, width=50).grid(row=1, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Model (Optional):").grid(row=2, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=model_var, width=50).grid(row=2, column=1, padx=5, pady=5)

    ttk.Label(frame, text="Manual File (Optional):").grid(row=3, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=manual_file_var, width=50, state="readonly").grid(row=3, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=upload_manual_file).grid(row=3, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Output Path:").grid(row=4, column=0, sticky="w", padx=5, pady=5)
    ttk.Entry(frame, textvariable=output_path_var, width=50).grid(row=4, column=1, padx=5, pady=5)
    ttk.Button(frame, text="Browse", command=browse_output_path).grid(row=4, column=2, padx=5, pady=5)

    ttk.Label(frame, text="Sections to Include:").grid(row=5, column=0, sticky="nw", padx=5, pady=5)
    sections = [
        "Scope", "Background", "Safety", "Materials and Equipment",
        "Standards and Calibration", "Procedure", "Quality Control",
        "Data Analysis", "Limitations", "Documentation", "References"
    ]
    for i, section in enumerate(sections):
        var = tk.BooleanVar()
        cb = tk.Checkbutton(frame, text=section, variable=var, command=lambda s=section: toggle_section(s))
        cb.grid(row=6 + i, column=0, sticky="w", padx=20, pady=2)

    ttk.Button(frame, text="Generate SOP", command=generate_sop_task).grid(row=6 + len(sections), column=0, columnspan=3, pady=20)

    root.mainloop()

if __name__ == "__main__":
    run_gui()
