In [2]:
import pandas as pd
import re
import torch
from transformers import pipeline, logging
from tqdm import tqdm

In [4]:
logging.set_verbosity_error()
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")

Using device: CPU


In [None]:
summarizer = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",  # can switch to 'google/pegasus-xsum' or 'philschmid/bart-large-cnn-samsum'
    tokenizer="facebook/bart-large-cnn",
    device=device
)

In [6]:
df = pd.read_csv("../data/filtered_enron_emails.csv")
df.head()

Unnamed: 0,file,body,email_type
0,allen-p/_sent_mail/10.,traveling to have a business meeting takes the...,meeting
1,allen-p/_sent_mail/1000.,"randy, can you send me a schedule of the salar...",meeting
2,allen-p/_sent_mail/1003.,please cc the following distribution list with...,project
3,allen-p/_sent_mail/102.,forwarded by phillip k allenhouect on 10162000...,reminder
4,allen-p/_sent_mail/103.,"mr. buckner, for delivered gas behind san dieg...",request


In [7]:
def generate_summary(text):
    text = text.strip()
    if len(text.split()) < 30:
        return "Too short to summarize."
    try:
        summary = summarizer(text, max_length=100, min_length=25, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"Error: {str(e)}"

In [8]:
sample_df = df.head(200).copy()
tqdm.pandas()
sample_df['summary'] = sample_df['body'].progress_apply(generate_summary)

100%|██████████| 200/200 [15:44<00:00,  4.72s/it]


In [9]:
for i in range(10):
    print(f"\n📩 Email Body:\n{sample_df.iloc[i]['body']}")
    print(f"\n📝 Summary:\n{sample_df.iloc[i]['summary']}")


📩 Email Body:
traveling to have a business meeting takes the fun out of the trip. especially if you have to prepare a presentation. i would suggest holding the business plan meetings here then take a trip without any formal business meetings. i would even try and get some honest opinions on whether a trip is even desired or necessary. as far as the business meetings, i think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. too often the presenter speaks and the others are quiet just waiting for their turn. the meetings might be better if held in a round table discussion format. my suggestion for where to go is austin. play golf and rent a ski boat and jet ski's. flying somewhere takes too much time.

📝 Summary:
i would suggest holding the business plan meetings here then take a trip without any formal business meetings. traveling to have a business meeting takes the fun out of the trip. my suggestion for wh