

*   Start the process and send an email notification.
*   Load data from a file.
*   Iterate over topics:
*   Group entries and process them.
*   Generate questions for grouped passages.
*   Save results to a final output file.
*   Send periodic updates.
*   Send a completion email and terminate the process.

In [None]:
import uuid
import smtplib
from email.message import EmailMessage
import time
import os
import json
from openai import AzureOpenAI
import re  # Added for processing question text

# Initialize the environment
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["AZURE_OPENAI_ENDPOINT"] = "<your_azure_endpoint>"
os.environ["AZURE_OPENAI_KEY"] = "<your_azure_api_key>"
os.environ["OPENAI_API_VERSION"] = "2024-02-15-preview"

client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
    api_version=os.getenv("OPENAI_API_VERSION")
)

print("Azure OpenAI client initialized successfully.")

# Email Sending Function
def sendEmail(messagetext):
    sender = "Your Name <your_email@example.com>"
    receiver = "Recipient Name <recipient_email@example.com>"
    message = EmailMessage()
    message.set_content(f"{messagetext}\n")
    message['Subject'] = 'Process Update'
    message['From'] = sender
    message['To'] = receiver

    try:
        with smtplib.SMTP("<your_smtp_server>", 587) as server:
            server.starttls()
            server.login("<your_smtp_user>", "<your_smtp_password>")
            server.send_message(message)
        print("Email sent successfully!")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Function to generate questions from a context
def generate_question(context):
    questions = []
    try:
        messages = [
            {"role": "system", "content": "Generate a list of compliance-related questions based on the provided context."},
            {"role": "user", "content": context}
        ]

        chat_completion = client.chat.completions.create(
            messages=messages,
            model="gpt-4-turbo-1106",
            temperature=0.7,
            max_tokens=800,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None
        )

        answ = chat_completion.choices[0].message.content.strip()
        question_list = answ.split('\n')
        for question in question_list:
            question = re.sub(r'^\d+\.\s*', '', question)  # Remove enumeration
            question = question.replace('\u200e', '').replace('\u200f', '')  # Clean special characters
            question = question.strip()
            if question.endswith('?'):
                questions.append(question)
    except Exception as e:
        print(f"An error occurred: {e}")
        time.sleep(20)
    return questions

# Function to process topics
def process_topics(data):
    overall_output = {}
    for topic_key, entries in data.items():
        output = process_single_topic(entries)
        overall_output[topic_key] = output
        print(f"Processed {topic_key} with {len(output)} entries.")
    return overall_output

# Function to process individual topic entries
def process_single_topic(entries):
    output = []
    last_email_time = time.time()

    if len(entries) < 3:
        print("Not enough entries to process.")
        return output

    for i in range(0, len(entries), 3):
        if time.time() - last_email_time >= 300:
            sendEmail(f"Processing update at {i} passages...")
            last_email_time = time.time()

        group = entries[i:i+3]
        if len(group) == 3:
            contexts = [entry['Passage'] for entry in group]
            questions = generate_question(" ".join(contexts))
            for question in questions:
                question_id = str(uuid.uuid4())
                for entry in group:
                    output.append({
                        "ID": entry['ID'],
                        "DocumentID": entry['DocumentID'],
                        "PassageID": entry['PassageID'],
                        "Passage": entry['Passage'],
                        "QuestionID": question_id,
                        "Question": question
                    })
    return output

# Main Function
def main():
    sendEmail("The process has started.")
    with open('topic_classifications.json', 'r') as file:
        data = json.load(file)

    overall_output = process_topics(data)

    with open('output_questions.json', 'w') as file:
        json.dump(overall_output, file, indent=4)

    sendEmail("The process has completed successfully.")

if __name__ == "__main__":
    main()
