In [None]:
import os
import re
import pandas as pd
from dotenv import load_dotenv
import google.generativeai as genai
from datetime import datetime
from typing import List, Dict, Tuple
import ast

In [None]:
# Constants
DATASET_PATH = "emails.csv"  # Update with your dataset path
OUTPUT_DIR = "output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Load .env
load_dotenv()

# Get the API key
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not found in .env file")

# Configure Gemini API
genai.configure(api_key=GEMINI_API_KEY)

In [None]:
# Initialize Gemini model
gemini_model = genai.GenerativeModel('models/gemini-1.5-flash-latest')
gemini_evaluation_model = genai.GenerativeModel('models/gemini-2.5-flash-preview-04-17')

In [None]:
def load_dataset(file_path):
    """Load the Enron email dataset"""
    try:
        df = pd.read_csv(file_path)
        print(f"Dataset loaded with {len(df)} emails")
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

In [None]:
def preprocess_email(text):
    """Clean and preprocess email text"""
    if not isinstance(text, str):
        return ""
    
    # Remove all known headers
    text = re.sub(r"(?i)^((Message-ID|Date|From|To|Subject|Cc|Bcc|Mime-Version|Content-Type|Content-Transfer-Encoding|X-[\w-]+):\s.*\n)+", '', text)
    
    # Remove signature starting with common delimiter
    text = re.sub(r"(?i)(\n)?(--|__|Regards,|Best,|Thanks,|Sincerely).*", '', text)

    # Remove quoted replies
    text = re.sub(r"(?s)(>+\s?.*\n)+", '', text)

    # Normalize spacing
    text = re.sub(r"\s+", ' ', text).strip()
    
    return text

In [None]:
def summarize_email_thread(emails):
    """Summarize a thread of emails using Gemini"""
    combined_text = "\n\n".join([preprocess_email(email) for email in emails])
    
    if len(combined_text.split()) < 50:
        return "Thread too short for meaningful summary."
    
    prompt = f"""
    You are a professional assistant tasked with summarizing a multi-message email thread.

    Your goal is to provide a clear, actionable summary suitable for busy professionals. Focus strictly on the following:

    - **Key decisions made** (if any)
    - **Action items and owners** (who needs to do what)
    - **Core discussion points** (main topics or concerns raised)

    Guidelines:
    - Write in bullet points or a short structured paragraph.
    - Exclude greetings, signatures, and irrelevant small talk.
    - Do not include any meta-comments about summarizing.
    - Only return the summary content — no explanations or preambles.
    
    Email Thread:
    {combined_text}
    """
    
    try:
        response = gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f"Error generating summary: {e}")
        return "Summary generation failed."

In [None]:
def generate_email_response(email_text):
    """Generate a response to an email using Gemini"""
    cleaned_text = preprocess_email(email_text)
    email_type = identify_common_email_type(cleaned_text)
    
    prompt = f"""
    You are an executive assistant generating email replies in a professional business context.

    Objective:
    Write a polite, concise, and contextually appropriate 3-5 sentence response to the following email.

    Email type: **{email_type}**
    Tone: Professional and courteous  
    Length: 3-5 sentences  
    Constraints:
    - Do not repeat the sender's message.
    - Do not add generic disclaimers or unrelated content.
    - Focus on addressing the email's core intent clearly.

    Email Content:
    \"\"\"
    {cleaned_text[:5000]}
    \"\"\"

    Your Response:
    """
    
    try:
        response = gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Response generation failed."

In [None]:
def identify_common_email_type(email_text):
    """Simple classifier to identify common email types"""
    email_text = email_text.lower()
    
        # Define keyword categories
    keyword_map = {
        "meeting request": ['meeting', 'schedule', 'appointment', 'calendar', 'reschedule', 'invite'],
        "status update": ['status', 'update', 'progress', 'report', 'milestone', 'summary'],
        "information request": ['question', 'help', 'advice', 'suggestion', 'clarify', 'inquire'],
        "thank you note": ['thank', 'thanks', 'appreciate', 'grateful', 'gratitude'],
        "follow-up": ['follow up', 'just checking', 'circling back', 'ping', 'reminder'],
        "issue report": ['bug', 'issue', 'error', 'problem', 'fail', 'failure', 'glitch'],
        "approval request": ['approve', 'approval', 'authorize', 'permission', 'sign off']
    }

    # Check for category matches
    for category, keywords in keyword_map.items():
        if any(kw in email_text for kw in keywords):
            return category

    return "general inquiry"

In [None]:
def evaluate_summary_quality(original_emails: List[str], summary: str) -> Dict[str, float]:
    """
    Evaluate the quality of a summary against the original emails.
    Returns a dictionary with evaluation metrics.
    """
    combined_original = "\n\n".join([preprocess_email(email) for email in original_emails])
    
    prompt = f"""
    You are an expert communication analyst. Your task is to evaluate a **generated email summary** based on the content of the **original email threads**.

    Use the following criteria, scoring each from **0.0 to 1.0** (where 1.0 = perfect, 0.0 = poor):

    [SCORING CRITERIA]
    1. **main_points_coverage** - Does the summary include all key ideas and important content from the emails?
    2. **conciseness** - Is the summary brief while still preserving essential meaning?
    3. **accuracy** - Are the facts and interpretations in the summary correct according to the original emails?
    4. **action_clarity** - Are any action items or next steps clearly and understandably summarized?
    5. **structure** - Is the summary logically organized and easy to follow?

    [INPUT DATA]
    Original Emails:
    {combined_original[:10000]}

    Generated Summary:
    {summary}

    Evaluation (return only a Python dictionary with float values from 0 to 1 and no other text):
    """
    
    try:
        response = gemini_evaluation_model.generate_content(prompt)
        print("\n--- Summary Evaluation:---\n", response.text)  # Debug
        evaluation = response.text.strip()
        return evaluation
    except Exception as e:
        print(f"Error evaluating summary: {e}")
    
    # Return default scores if evaluation fails
    return {
        'main_points_coverage': 0.1,
        'conciseness': 0.1,
        'accuracy': 0.1,
        'action_clarity': 0.1,
        'structure': 0.1
    }

In [None]:
def evaluate_response_quality(original_email: str, response: str) -> Dict[str, float]:
    """
    Evaluate the quality of a generated response.
    Returns a dictionary with evaluation metrics.
    """
    prompt = f"""
    You are an expert email communication evaluator.

    Your task is to evaluate the **generated email response** using the following five criteria. Each should be scored individually on a float scale from 0.0 to 1.0 (inclusive), where:
    - 1.0 = perfect
    - 0.0 = poor

    [RETURN FORMAT REQUIREMENT]
    Return ONLY a **valid Python dictionary**, using this exact structure:
    {{
        'coherence': float,
        'contextual_appropriateness': float,
        'relevance': float,
        'professionalism': float,
        'actionability': float
    }}

    [CRITERIA DEFINITION]
    1. **coherence**: Is the response logically structured and easy to follow?
    2. **contextual_appropriateness**: Does the tone and content fit the original email's context?
    3. **relevance**: Does it appropriately address the content of the original email?
    4. **professionalism**: Is the tone courteous, formal where appropriate, and free of slang?
    5. **actionability**: Are clear next steps or calls to action included if necessary?

    [INPUT]
    Original Email:
    \"\"\"{original_email[:5000]}\"\"\"

    Generated Response:
    \"\"\"{response}\"\"\"

    [IMPORTANT]
    - DO NOT explain your ratings.
    - DO NOT return anything except the dictionary. 
    """
    
    try:
        response = gemini_evaluation_model.generate_content(prompt)
        print("\n--- Response Evaluation:--- \n", response.text)
        # Safely evaluate the dictionary response
        evaluation = response.text.strip()
        return evaluation
    except Exception as e:
        print(f"Error evaluating response: {e}")
    
    # Return default scores if evaluation fails
    return {
        'coherence': 0.1,
        'contextual_appropriateness': 0.1,
        'relevance': 0.1,
        'professionalism': 0.1,
        'actionability': 0.1
    }


In [None]:
def format_evaluation_results(evaluation: Dict[str, float], prefix: str = "") -> str:
    """Format evaluation metrics for display"""
    formatted = []
    for key, value in evaluation.items():
        formatted_key = prefix + key.replace('_', ' ').title()
        formatted.append(f"{formatted_key}: {value:.2f}/1.00")
    return "\n".join(formatted)

In [None]:
def main():
    # Load dataset
    df = load_dataset(DATASET_PATH)
    if df is None:
        return
    
    # Example 1: Summarize a thread of 3 emails
    # print("\n=== Email Summarization Example ===")
    sample_thread = df['message'].sample(3).tolist()
    # print("\nOriginal Thread (first email snippet):")
    # print(sample_thread[0][:200] + "...")
    
    summary = summarize_email_thread(sample_thread)
    print("\n=== Generated Summary=== :")
    print(summary)

    # Evaluate summary quality
    summary_evaluation = evaluate_summary_quality(sample_thread, summary)
    
    
    # Example 2: Generate response to an email
    # print("\n=== Response Generation Example ===")
    sample_email = df['message'].sample(1).iloc[0]
    # print("\nOriginal Email snippet:")
    # print(sample_email[:200] + "...")
    
    response = generate_email_response(sample_email)
    print("\n=== Generated Response:=== ")
    print(response)

    # Evaluate response quality
    response_evaluation = evaluate_response_quality(sample_email, response)
    
    # Save results with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = os.path.join(OUTPUT_DIR, f"results_{timestamp}.txt")
    
    with open(output_file, "w", encoding='utf-8') as f:
        # Write original thread content
        f.write("=== ORIGINAL THREAD ===\n")
        for i, email in enumerate(sample_thread):
            f.write(f"\n--- Email {i+1} ---\n")
            f.write(email + "\n")
        # Write summary
        f.write("\n\n=== GENERATED SUMMARY ===\n")
        f.write(summary + "\n")
        
        # Write original email for response
        f.write("\n\n=== ORIGINAL EMAIL FOR RESPONSE ===\n")
        f.write(sample_email + "\n")
        
        # Write generated response
        f.write("\n=== GENERATED RESPONSE ===\n")
        f.write(response + "\n")
    
    print(f"\nProcessing complete. Results saved to {output_file}")

In [None]:
if __name__ == "__main__":
    main()