In [None]:
import requests
import json

BASE_URL = "http://127.0.0.1:5000"

In [None]:
from bs4 import BeautifulSoup
import re

from llm import llm

In [None]:
def default_response_action(response):
    if response.status_code == 200:
        email_data = response.json()
        return email_data
    else:
        print(f"Status code: {response.status_code}")
        
        # Only attempt to decode the response as JSON if the content type is 'application/json'.
        if 'application/json' in response.headers.get('Content-Type', ''):
            print(f"Response: {response.json()}")
        else:
            print(f"Response: {response.text}")    
        return None

In [None]:
def get_emails():
    url = f"{BASE_URL}/api/emails"
    response = requests.get(url)
    return default_response_action(response)

In [None]:
def get_full_email(header_message_id):
    url = f"{BASE_URL}/api/full-email/{header_message_id}"
    response = requests.get(url)
    return default_response_action(response)

In [None]:
mails = get_emails()
mails

In [None]:
header_message_id = "NM66423025A0360265Dadobein_mid_prod9@mail.adobe.com"
email = get_full_email(header_message_id)
email

In [None]:
def html_to_text(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text()

    # Replace sequences of whitespace characters with a single space or newline
    def replacer(match):
        # if the matched string contains a newline, replace with newline, otherwise replace with space
        return '\n' if '\n' in match.group() else ' '

    text = re.sub(r'[ \t\n]+', replacer, text)
    
    return text.strip()  # Removing any trailing or leading whitespace

In [None]:
def email_to_prompt(email):
    subject = email.get('header', {}).get('subject', '')
    author = email.get('header', {}).get('author', '')
    body = html_to_text(email.get('body', ''))
    return f"""
Subject: {subject}
Author: {author}

EMAIL BODY BELOW THIS LINE

{body}
"""

In [None]:
def process_email(email):
    threshold = 150
    prompt = email_to_prompt(email)
    result, reason = llm(prompt)
    if not result:
        return None, reason
    summary = result.get('summary', '')
    body = html_to_text(email['body'])
    if len(body) <= threshold:
        result['summary'] = body
        result['is_full_message'] = True
    elif not summary:
        result['summary'] = body[:threshold]
        result['is_uprocessed_summary'] = True
    return result, "Success"

In [None]:
threshold = 150
prompt = email_to_prompt(email)
result, reason = llm(prompt)
result

In [None]:
from summarizer import summarize

summarize(email)