In [126]:
###imports from our modules
from preprocessing import (
    extract_email_fields,
    clean_email_text,
    split_email_chain,
    nltk_preprocess,
    final_email_filter
)
from features import (
    extract_tfidf_features,
    get_top_tfidf_terms
)
from summarization import (
    split_sentences,
    fit_vectorizer,
    is_valid_sentence,
    summarize_email,
    summarize_abstractive
)
from gui_app import run_email_summarizer_gui

In [127]:
###standard libraries and downloads
import pandas as pd #used for handling tabular data
import re #regex library used for text pattern matching
import html #used for processing unescapes html entities
import nltk #this is the natural language toolkit used for tokenization, stemming etc.
import numpy as np #used for numerical operations
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from rouge_utils import compute_rouge_scores, compute_rouge_for_dataframe
!pip install transformers
!conda install pytorch torchvision torchaudio cpuonly -c pytorch -y
from transformers import pipeline

[nltk_data] Downloading package punkt to C:\Users\Sulaiman
[nltk_data]     Abukakar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Sulaiman
[nltk_data]     Abukakar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Sulaiman
[nltk_data]     Abukakar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Channels:
 - pytorch
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



## Data Loading and Exploration
- Import and load most of our essential libraries 
- Download neccessary resources like the NLTK and install libraries- Transformers and Pytorch for deep learning
- Load our main dataset and do a quick inspection of our dataset(.head, .tail, .info, .describe)
- Finally display the content of our email after removing column width to get a better look at our data
- Again loading a subset of our dataset (2000 rows) and then using .head to get more insights

In [128]:
df = pd.read_csv("main_enron_emails.csv")

In [129]:
df.head()

Unnamed: 0,file,message
0,allen-p/_sent_mail/1.,"Message-ID: <18782981.1075855378110.JavaMail.evans@thyme>\nDate: Mon, 14 May 2001 16:39:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: tim.belden@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Tim Belden <Tim Belden/Enron@EnronXGate>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nHere is our forecast\n\n"
1,allen-p/_sent_mail/10.,"Message-ID: <15464986.1075855378456.JavaMail.evans@thyme>\nDate: Fri, 4 May 2001 13:51:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: john.lavorato@enron.com\nSubject: Re:\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: John J Lavorato <John J Lavorato/ENRON@enronXgate@ENRON>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nTraveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal business meetings. I would even try and get some honest opinions on whether a trip is even desired or necessary.\n\nAs far as the business meetings, I think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. Too often the presenter speaks and the others are quiet just waiting for their turn. The meetings might be better if held in a round table discussion format. \n\nMy suggestion for where to go is Austin. Play golf and rent a ski boat and jet ski's. Flying somewhere takes too much time.\n"
2,allen-p/_sent_mail/100.,"Message-ID: <24216240.1075855687451.JavaMail.evans@thyme>\nDate: Wed, 18 Oct 2000 03:00:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: leah.arsdall@enron.com\nSubject: Re: test\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Leah Van Arsdall\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\ntest successful. way to go!!!"
3,allen-p/_sent_mail/1000.,"Message-ID: <13505866.1075863688222.JavaMail.evans@thyme>\nDate: Mon, 23 Oct 2000 06:13:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: randall.gay@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Randall L Gay\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nRandy,\n\n Can you send me a schedule of the salary and level of everyone in the \nscheduling group. Plus your thoughts on any changes that need to be made. \n(Patti S for example)\n\nPhillip"
4,allen-p/_sent_mail/1001.,"Message-ID: <30922949.1075863688243.JavaMail.evans@thyme>\nDate: Thu, 31 Aug 2000 05:07:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: greg.piper@enron.com\nSubject: Re: Hello\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Greg Piper\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nLet's shoot for Tuesday at 11:45."


In [130]:
df.tail()

Unnamed: 0,file,message
517396,zufferli-j/sent_items/95.,"Message-ID: <26807948.1075842029936.JavaMail.evans@thyme>\nDate: Wed, 28 Nov 2001 13:30:11 -0800 (PST)\nFrom: john.zufferli@enron.com\nTo: kori.loibl@enron.com\nSubject: Trade with John Lavorato\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Zufferli, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JZUFFER>\nX-To: Loibl, Kori </O=ENRON/OU=NA/CN=RECIPIENTS/CN=Kloibl>\nX-cc: \nX-bcc: \nX-Folder: \ExMerge - Zufferli, John\Sent Items\nX-Origin: ZUFFERLI-J\nX-FileName: john zufferli 6-26-02.PST\n\nThis is a trade with OIL-SPEC-HEDGE-NG (John Lavorato's book) and John Zufferli's book CAND-PWR-PR\n\nCAND-PWR-PR buys from OIL-SPEC-HEDGE-NG Nymex Gas\n\nCal 03\t\t\t38,500 MMBtu/day\t\t$3.2978\nCal 04-Cal 05\t\t35,000 MMBtu/day\t\t$3.4482\nCal 06-Cal 10\t\t40,000 MMBtu/day\t\t$3.7369\nCal 11-Cal 15\t\t18,500 MMBtu/Day\t\t$4.2057\n\nPlease Confirm Receipt\n\n\nJohn Z"
517397,zufferli-j/sent_items/96.,"Message-ID: <25835861.1075842029959.JavaMail.evans@thyme>\nDate: Wed, 28 Nov 2001 12:47:48 -0800 (PST)\nFrom: john.zufferli@enron.com\nTo: john.lavorato@enron.com\nSubject: Gas Hedges\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Zufferli, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JZUFFER>\nX-To: Lavorato, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=Jlavora>\nX-cc: \nX-bcc: \nX-Folder: \ExMerge - Zufferli, John\Sent Items\nX-Origin: ZUFFERLI-J\nX-FileName: john zufferli 6-26-02.PST\n\nSome of my position is with the Alberta Term book, I will send you only the positions that I have directly with ENA."
517398,zufferli-j/sent_items/97.,"Message-ID: <28979867.1075842029988.JavaMail.evans@thyme>\nDate: Wed, 28 Nov 2001 07:20:00 -0800 (PST)\nFrom: john.zufferli@enron.com\nTo: dawn.doucet@enron.com\nSubject: RE: CONFIDENTIAL\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Zufferli, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JZUFFER>\nX-To: Doucet, Dawn </O=ENRON/OU=NA/CN=RECIPIENTS/CN=Ddoucet>\nX-cc: \nX-bcc: \nX-Folder: \ExMerge - Zufferli, John\Sent Items\nX-Origin: ZUFFERLI-J\nX-FileName: john zufferli 6-26-02.PST\n\n2\n\n -----Original Message-----\nFrom: \tDoucet, Dawn \nSent:\tWednesday, November 28, 2001 8:17 AM\nTo:\tZufferli, John\nSubject:\tCONFIDENTIAL\n\nMorning John,\nI'm still working on the mini-PRC for Lavo. Sean Lalani has not yet been ranked and rumour has it that he reports to you now. Can you confirm and send me a number. Thanks!"
517399,zufferli-j/sent_items/98.,"Message-ID: <22052556.1075842030013.JavaMail.evans@thyme>\nDate: Tue, 27 Nov 2001 11:52:45 -0800 (PST)\nFrom: john.zufferli@enron.com\nTo: jeanie.slone@enron.com\nSubject: Calgary Analyst/Associate\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Zufferli, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JZUFFER>\nX-To: Slone, Jeanie </O=ENRON/OU=NA/CN=RECIPIENTS/CN=Jslone>\nX-cc: \nX-bcc: \nX-Folder: \ExMerge - Zufferli, John\Sent Items\nX-Origin: ZUFFERLI-J\nX-FileName: john zufferli 6-26-02.PST\n\nAnalyst\t\t\t\t\tRank\n\nStephane Brodeur\t\t\t1\nChad Clark\t\t\t\t1\nIan Cooke\t\t\t\t3\nLon Draper\t\t\t\t1\nFabian Taylor\t\t\t\t2\nCarlos Torres\t\t\t\t3\nRyan Watt\t\t\t\t1\n\nAssociate\n\nCooper Richey\t\t\t\t1\n"
517400,zufferli-j/sent_items/99.,"Message-ID: <28618979.1075842030037.JavaMail.evans@thyme>\nDate: Mon, 26 Nov 2001 10:48:43 -0800 (PST)\nFrom: john.zufferli@enron.com\nTo: livia_zufferli@monitor.com\nSubject: RE: ali's essays\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Zufferli, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JZUFFER>\nX-To: 'Livia_Zufferli@Monitor.com@ENRON'\nX-cc: \nX-bcc: \nX-Folder: \ExMerge - Zufferli, John\Sent Items\nX-Origin: ZUFFERLI-J\nX-FileName: john zufferli 6-26-02.PST\n\ni think the YMCA has a class that is for people recovering from heart-attacks\ni remeber something about that\n\n -----Original Message-----\nFrom: \tLivia_Zufferli@Monitor.com@ENRON \nSent:\tMonday, November 26, 2001 11:44 AM\nTo:\tZufferli, John\nSubject:\tRE: ali's essays\n\n\ni don't know about the heart classes. i'll look into it, but her dr (ravi)\nisn't offering up any suggestions or anything. she saw him before the\nsurgery in august, and he said things were okay. i really don't think he's\ntoo helpful.\n\nshe is lazy -- but it really frustrates me that she doesn't want to help\nherself. i told her that not walking is like not taking her heart\nmedication. that didn't seem to resonate. dad is going to go to the YMCA\ntomorrow and maybe get a membership for both of them -- they have a walking\ntrack there (at least it's something to do in the winter). when she was\ndown this weekend, we walked around the craft show (at the Exhibition\nplace) and she said that was a lot of exercise (2 hrs). The only problem\nis that we were just strolling, and not really walking very fast.\n\n\n\n\n\n John.Zufferli@\n enron.com To: Livia_Zufferli@Monitor.com\n cc:\n 11/26/2001 Subject: RE: ali's essays\n 01:41 PM\n\n\n\n\n\n\njust send the essay at home\n\nI don't know what to do about mom, i don't think fear is the only thing\nholding her back , i think she is lazy\n\nis there a heart health class in Sudbury that has excercise regimines as\nwell as diets?\n\nwhen is the last time she saw her doctor\n\n -----Original Message-----\n From: Livia_Zufferli@Monitor.com@ENRON\n Sent: Monday, November 26, 2001 11:19 AM\n To: Zufferli, John\n Subject: ali's essays\n\n Hi John\n\n How was Thanksgiving? Was the baby shower fun?\n\n I was wondering if you'd have some time to read over Ali's Chicago\n essays\n later tonight? He's going to submit them on Wednesday. Let me know if\n that's okay. Do you have a printer at home? Can I send them to your\n home\n account? (I don't think Ali will be done before about 8pm or so\n tonight).\n\n PS: We need to talk about mom. I saw her this weekend -- she's gained\n a\n lot of weight, and hasn't been exercising at all. Dad's pretty\n frustrated\n because all she does is watch tv. I had a talk with her yesterday\n telling\n her that she has more risk of having a heart attack if she doesn't walk\n /\n exercise than if she exerts herself when exercising (i think she's\n afraid\n of having a heart attack while exercising). We need to do something --\n she's 170lbs now, and should be at around 140lbs to be healthy.\n\n Livia\n\n\n\n\n**********************************************************************\nThis e-mail is the property of Enron Corp. and/or its relevant affiliate\nand may contain confidential and privileged material for the sole use of\nthe intended recipient (s). Any review, use, distribution or disclosure by\nothers is strictly prohibited. If you are not the intended recipient (or\nauthorized to receive for the recipient), please contact the sender or\nreply to Enron Corp. at enron.messaging.administration@enron.com and delete\nall copies of the message. This e-mail (and any attachments hereto) are not\nintended to be an offer (or an acceptance) and do not create or evidence a\nbinding and enforceable contract between Enron Corp. (or any of its\naffiliates) and the intended recipient or any other party, and may not be\nrelied on by anyone as the basis of a contract by estoppel or otherwise.\nThank you.\n**********************************************************************\n\n\n"


In [131]:
df.describe()

Unnamed: 0,file,message
count,517401,517401
unique,517401,517401
top,allen-p/_sent_mail/1.,"Message-ID: <18782981.1075855378110.JavaMail.evans@thyme>\nDate: Mon, 14 May 2001 16:39:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: tim.belden@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Tim Belden <Tim Belden/Enron@EnronXGate>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nHere is our forecast\n\n"
freq,1,1


In [132]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517401 entries, 0 to 517400
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype 
---  ------   --------------   ----- 
 0   file     517401 non-null  object
 1   message  517401 non-null  object
dtypes: object(2)
memory usage: 7.9+ MB


In [133]:
pd.set_option("display.max_colwidth", None)
df.loc[1]

file                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

### Insights we from this exploration
- Our dataset has no missing values so at this point there's no need to drop or fill columns
- No need to look for outliers and skewness because all the data in our dataset are objects
- From the final loc we can see a bit more into the nature of our message- message id, date, from, to, subject, body.

## Data Exploration
- We created a function to extract structured fields using the re library from our raw email text
- We then applied it to our df_subset to create a new dataframe with structured fields rather than raw text
- We redefined our df_subset to be a merged dataset of our original data dn extracted data
- We then preview a few columns from the dataset

In [134]:
###loading subset of 2000 rows for faster processing
df_subset = pd.read_csv("main_enron_emails.csv", encoding="utf-8", nrows=2000)

In [135]:
pd.set_option("display.max_colwidth", None)
df_subset.head()

Unnamed: 0,file,message
0,allen-p/_sent_mail/1.,"Message-ID: <18782981.1075855378110.JavaMail.evans@thyme>\nDate: Mon, 14 May 2001 16:39:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: tim.belden@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Tim Belden <Tim Belden/Enron@EnronXGate>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nHere is our forecast\n\n"
1,allen-p/_sent_mail/10.,"Message-ID: <15464986.1075855378456.JavaMail.evans@thyme>\nDate: Fri, 4 May 2001 13:51:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: john.lavorato@enron.com\nSubject: Re:\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: John J Lavorato <John J Lavorato/ENRON@enronXgate@ENRON>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nTraveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal business meetings. I would even try and get some honest opinions on whether a trip is even desired or necessary.\n\nAs far as the business meetings, I think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. Too often the presenter speaks and the others are quiet just waiting for their turn. The meetings might be better if held in a round table discussion format. \n\nMy suggestion for where to go is Austin. Play golf and rent a ski boat and jet ski's. Flying somewhere takes too much time.\n"
2,allen-p/_sent_mail/100.,"Message-ID: <24216240.1075855687451.JavaMail.evans@thyme>\nDate: Wed, 18 Oct 2000 03:00:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: leah.arsdall@enron.com\nSubject: Re: test\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Leah Van Arsdall\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\ntest successful. way to go!!!"
3,allen-p/_sent_mail/1000.,"Message-ID: <13505866.1075863688222.JavaMail.evans@thyme>\nDate: Mon, 23 Oct 2000 06:13:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: randall.gay@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Randall L Gay\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nRandy,\n\n Can you send me a schedule of the salary and level of everyone in the \nscheduling group. Plus your thoughts on any changes that need to be made. \n(Patti S for example)\n\nPhillip"
4,allen-p/_sent_mail/1001.,"Message-ID: <30922949.1075863688243.JavaMail.evans@thyme>\nDate: Thu, 31 Aug 2000 05:07:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: greg.piper@enron.com\nSubject: Re: Hello\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Greg Piper\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nLet's shoot for Tuesday at 11:45."


In [136]:
###creating new column email fielsd by applying our extract function on the column message
email_fields = df_subset["message"].apply(extract_email_fields)

In [137]:
###joining the new formed column to our dataframe
df_subset = pd.concat([df_subset, email_fields], axis=1)

In [138]:
###preview
print(df_subset.columns) 
df_subset[["from", "to", "subject", "body"]].head(5)

Index(['file', 'message', 'message_id', 'date', 'from', 'to', 'subject',
       'body'],
      dtype='object')


Unnamed: 0,from,to,subject,body
0,phillip.allen@enron.com,tim.belden@enron.com,Mime-Version: 1.0,Here is our forecast
1,phillip.allen@enron.com,john.lavorato@enron.com,Re:,"Traveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal business meetings. I would even try and get some honest opinions on whether a trip is even desired or necessary.\n\nAs far as the business meetings, I think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. Too often the presenter speaks and the others are quiet just waiting for their turn. The meetings might be better if held in a round table discussion format. \n\nMy suggestion for where to go is Austin. Play golf and rent a ski boat and jet ski's. Flying somewhere takes too much time."
2,phillip.allen@enron.com,leah.arsdall@enron.com,Re: test,test successful. way to go!!!
3,phillip.allen@enron.com,randall.gay@enron.com,Mime-Version: 1.0,"Randy,\n\n Can you send me a schedule of the salary and level of everyone in the \nscheduling group. Plus your thoughts on any changes that need to be made. \n(Patti S for example)\n\nPhillip"
4,phillip.allen@enron.com,greg.piper@enron.com,Re: Hello,Let's shoot for Tuesday at 11:45.


In [139]:
###remove duplicates
if df_subset.duplicated().any():
    df_subset = df_subset.drop_duplicates()
df_subset = df_subset.drop_duplicates().reset_index(drop=True)

### Insights 
- From the preview, particularly the body column(which is our main focus), we can see that some rows(in our body) have some things(\n, mass of random email, etc).

## Data Cleaning- Part 1
- We made a function to clean our data(removing thing like HTML entries and tags, file references, substituting things with placeholders, removing long headed and other things)
- Since our main business lies with the body of our email, we are making a new column by applying our cleaning function through the body column of our df_subset
- Some emails arent single messages, rather they are full threads with replies stacked inside a big block.
- Therefore we made a split function to detect thread markers 
- We applied this to a sample of 200 email to work faster while debugging
- We made a big filtering function to make sure our emails have a certain number of characters, alphabet, placeholder, symbol ratio, reply parts count, sentence structure and word count

In [140]:
###cleaning
df_subset["cleaned_text"] = df_subset["body"].apply(clean_email_text)
print(df_subset["cleaned_text"].iloc[0])

Here is our forecast


In [141]:
df_subset["cleaned_text"].head()

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Here is our forecast
1    Traveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal busine

In [142]:
df_subset.head()
print(df_subset.columns)

Index(['file', 'message', 'message_id', 'date', 'from', 'to', 'subject',
       'body', 'cleaned_text'],
      dtype='object')


In [143]:
sample_df = df_subset.sample(n=1000, random_state=42).copy()
sample_df["email_parts"] = sample_df["cleaned_text"].apply(split_email_chain)
sample_df.head(5)

Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts
1860,allen-p/discussion_threads/468.,"Message-ID: <3039085.1075855709869.JavaMail.evans@thyme>\nDate: Thu, 8 Mar 2001 06:46:00 -0800 (PST)\nFrom: ina.rangel@enron.com\nTo: information.management@enron.com\nSubject: Mike Grigsby\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Ina Rangel\nX-To: Information Risk Management\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\Discussion threads\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nPlease approve Mike Grigsby for Bloomberg. \n\nThank You,\nPhillip Allen",3039085.1075855709869.JavaMail.evans@thyme,"Thu, 8 Mar 2001 06:46:00 -0800 (PST)",ina.rangel@enron.com,information.management@enron.com,Mike Grigsby,"Please approve Mike Grigsby for Bloomberg. \n\nThank You,\nPhillip Allen","Please approve Mike Grigsby for Bloomberg. Thank You, Phillip Allen","[Please approve Mike Grigsby for Bloomberg. Thank You, Phillip Allen]"
353,allen-p/_sent_mail/420.,"Message-ID: <31210027.1075855725287.JavaMail.evans@thyme>\nDate: Wed, 28 Mar 2001 23:13:00 -0800 (PST)\nFrom: phillip.allen@enron.com\nTo: barry.tycholiz@enron.com\nSubject: Re: Opening Day - Baseball Tickets\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Barry Tycholiz\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nno problem",31210027.1075855725287.JavaMail.evans@thyme,"Wed, 28 Mar 2001 23:13:00 -0800 (PST)",phillip.allen@enron.com,barry.tycholiz@enron.com,Re: Opening Day - Baseball Tickets,no problem,no problem,[no problem]
1333,allen-p/deleted_items/195.,"Message-ID: <5558637.1075858635446.JavaMail.evans@thyme>\nDate: Wed, 24 Oct 2001 17:30:02 -0700 (PDT)\nFrom: arsystem@mailman.enron.com\nTo: k..allen@enron.com\nSubject: Your Approval is Overdue: Access Request for matt.smith@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: ARSystem <ARSystem@mailman.enron.com>@ENRON\nX-To: Allen, Phillip K. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PALLEN>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications\n\n\n\n",5558637.1075858635446.JavaMail.evans@thyme,"Wed, 24 Oct 2001 17:30:02 -0700 (PDT)",arsystem@mailman.enron.com,k..allen@enron.com,Your Approval is Overdue: Access Request for matt.smith@enron.com,This request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,[This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications]
905,allen-p/all_documents/377.,"Message-ID: <227662.1075855694115.JavaMail.evans@thyme>\nDate: Tue, 1 May 2001 07:14:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: ina.rangel@enron.com\nSubject: 2- SURVEY/INFORMATION EMAIL\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Ina Rangel\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\All documents\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\n---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY): \n\n\n",227662.1075855694115.JavaMail.evans@thyme,"Tue, 1 May 2001 07:14:00 -0700 (PDT)",phillip.allen@enron.com,ina.rangel@enron.com,2- SURVEY/INFORMATION EMAIL,"---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY):","02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):","[02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):]"
1289,allen-p/deleted_items/153.,"Message-ID: <14009873.1075858634205.JavaMail.evans@thyme>\nDate: Mon, 13 Aug 2001 08:47:16 -0700 (PDT)\nFrom: msimpkins@winstead.com\nTo: pallen@enron.com, pallen70@hotmail.com\nSubject: Revised Utility Construction Escrow Agreement - Lakeline Apts.\nCc: michaelb@amhms.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: michaelb@amhms.com\nX-From: ""Simpkins, Michelle"" <MSimpkins@winstead.com>@ENRON <IMCEANOTES-+22Simpkins+2C+20Michelle+22+20+3CMSimpkins+40winstead+2Ecom+3E+40ENRON@ENRON.com>\nX-To: 'pallen@enron.com', 'pallen70@hotmail.com'\nX-cc: 'michaelb@amhms.com'\nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\n <<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC",14009873.1075858634205.JavaMail.evans@thyme,"Mon, 13 Aug 2001 08:47:16 -0700 (PDT)",msimpkins@winstead.com,"pallen@enron.com, pallen70@hotmail.com",Revised Utility Construction Escrow Agreement - Lakeline Apts.,"<<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC","Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC","[Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC]"


In [144]:
print(sample_df.shape)

(1000, 10)


In [145]:
print(sample_df["cleaned_text"].isna().sum())

0


In [146]:
print(sample_df["cleaned_text"].head(10))

1860                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

In [147]:
###filtering
sample_df = final_email_filter(sample_df)
sample_df.head(3)

After length filter: 814
After word count filter: 691
After alpha ratio filter: 683
After placeholder ratio filter: 683
After symbol ratio filter: 669
After sentence punctuation filter: 663
After email parts count filter: 663


Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts
0,allen-p/deleted_items/195.,"Message-ID: <5558637.1075858635446.JavaMail.evans@thyme>\nDate: Wed, 24 Oct 2001 17:30:02 -0700 (PDT)\nFrom: arsystem@mailman.enron.com\nTo: k..allen@enron.com\nSubject: Your Approval is Overdue: Access Request for matt.smith@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: ARSystem <ARSystem@mailman.enron.com>@ENRON\nX-To: Allen, Phillip K. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PALLEN>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications\n\n\n\n",5558637.1075858635446.JavaMail.evans@thyme,"Wed, 24 Oct 2001 17:30:02 -0700 (PDT)",arsystem@mailman.enron.com,k..allen@enron.com,Your Approval is Overdue: Access Request for matt.smith@enron.com,This request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,[This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications]
1,allen-p/all_documents/377.,"Message-ID: <227662.1075855694115.JavaMail.evans@thyme>\nDate: Tue, 1 May 2001 07:14:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: ina.rangel@enron.com\nSubject: 2- SURVEY/INFORMATION EMAIL\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Ina Rangel\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\All documents\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\n---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY): \n\n\n",227662.1075855694115.JavaMail.evans@thyme,"Tue, 1 May 2001 07:14:00 -0700 (PDT)",phillip.allen@enron.com,ina.rangel@enron.com,2- SURVEY/INFORMATION EMAIL,"---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY):","02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):","[02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):]"
2,allen-p/deleted_items/153.,"Message-ID: <14009873.1075858634205.JavaMail.evans@thyme>\nDate: Mon, 13 Aug 2001 08:47:16 -0700 (PDT)\nFrom: msimpkins@winstead.com\nTo: pallen@enron.com, pallen70@hotmail.com\nSubject: Revised Utility Construction Escrow Agreement - Lakeline Apts.\nCc: michaelb@amhms.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: michaelb@amhms.com\nX-From: ""Simpkins, Michelle"" <MSimpkins@winstead.com>@ENRON <IMCEANOTES-+22Simpkins+2C+20Michelle+22+20+3CMSimpkins+40winstead+2Ecom+3E+40ENRON@ENRON.com>\nX-To: 'pallen@enron.com', 'pallen70@hotmail.com'\nX-cc: 'michaelb@amhms.com'\nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\n <<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC",14009873.1075858634205.JavaMail.evans@thyme,"Mon, 13 Aug 2001 08:47:16 -0700 (PDT)",msimpkins@winstead.com,"pallen@enron.com, pallen70@hotmail.com",Revised Utility Construction Escrow Agreement - Lakeline Apts.,"<<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC","Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC","[Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC]"


### Conclusion
- We have now cleaned our data enough to go through summarization without any problem and confusing our model.

## Feature Exploration
- We initialized and generated our scores and tokens
- We carried an intial extraction of TF-IDF features from the sample of our dataset to get insights
- After a editing our pre processing function, we generate a function that loop through the rows in our dataframe and gets the top idf scores and tokens
- Also we trained our tf idf matrix on other pre processed text for better extraction during our summary

In [148]:
###preprocessing for features
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
tokenizer = RegexpTokenizer(r"\w+")

In [149]:
sample_df["nltk_processed_text"] = sample_df["cleaned_text"].apply(nltk_preprocess)
sample_df.head(1)

Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts,nltk_processed_text
0,allen-p/deleted_items/195.,"Message-ID: <5558637.1075858635446.JavaMail.evans@thyme>\nDate: Wed, 24 Oct 2001 17:30:02 -0700 (PDT)\nFrom: arsystem@mailman.enron.com\nTo: k..allen@enron.com\nSubject: Your Approval is Overdue: Access Request for matt.smith@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: ARSystem <ARSystem@mailman.enron.com>@ENRON\nX-To: Allen, Phillip K. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PALLEN>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications\n\n\n\n",5558637.1075858635446.JavaMail.evans@thyme,"Wed, 24 Oct 2001 17:30:02 -0700 (PDT)",arsystem@mailman.enron.com,k..allen@enron.com,Your Approval is Overdue: Access Request for matt.smith@enron.com,This request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,[This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications],request pending approval day please click link review act upon request request id phone request create date requested email resource name risk acceptance form local admin right permanent resource type application


In [150]:
###initialising
vectorizer = fit_vectorizer(sample_df["nltk_processed_text"])
tfidf_matrix = vectorizer.transform(sample_df["nltk_processed_text"])

In [151]:
###TF IDF feature extraction
feature_names = vectorizer.get_feature_names_out()
row = tfidf_matrix[0].toarray().flatten()
try:
    top_n = 10
    top_indices = row.argsort()[::-1][:top_n]
    print("\nTop terms in first email:")
    for idx in top_indices:
        print(f"{feature_names[idx]}: {row[idx]:.4f}")
except Exception as e:
    print(f"Error during extractive summarization: {e}")


Top terms in first email:
request: 0.5991
resource: 0.3093
admin: 0.1964
pending: 0.1881
acceptance: 0.1846
permanent: 0.1753
act: 0.1739
local: 0.1739
approval: 0.1643
upon: 0.1602


In [152]:
top_terms_list = []
for i in range(tfidf_matrix.shape[0]):
    top_terms = get_top_tfidf_terms(tfidf_matrix[i], feature_names)
    top_terms_list.append(top_terms)
sample_df["top_keywords"] = top_terms_list
sample_df.head(1)

Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts,nltk_processed_text,top_keywords
0,allen-p/deleted_items/195.,"Message-ID: <5558637.1075858635446.JavaMail.evans@thyme>\nDate: Wed, 24 Oct 2001 17:30:02 -0700 (PDT)\nFrom: arsystem@mailman.enron.com\nTo: k..allen@enron.com\nSubject: Your Approval is Overdue: Access Request for matt.smith@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: ARSystem <ARSystem@mailman.enron.com>@ENRON\nX-To: Allen, Phillip K. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PALLEN>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications\n\n\n\n",5558637.1075858635446.JavaMail.evans@thyme,"Wed, 24 Oct 2001 17:30:02 -0700 (PDT)",arsystem@mailman.enron.com,k..allen@enron.com,Your Approval is Overdue: Access Request for matt.smith@enron.com,This request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,[This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications],request pending approval day please click link review act upon request request id phone request create date requested email resource name risk acceptance form local admin right permanent resource type application,"[request, resource, admin, pending, acceptance, permanent, act, local, approval, upon]"


### Insights(Particulaly from the features names)
- A suprisingly large amount of top ranked features were numeric code and timestamp fragments(000, 03pm, 10457)
- This bit of feature extraction shows us that we could further refine our cleaning by removing low value numeric patterns(we went back to our nlkt main preprocessing function)
- Although we arent really training a model this bit helped us to like evaluate the effectiveness of our cleaning and also the noise left in our dataset
- Final note(after impleting a numeric filter), decided to leave the ones that had passed because they might hold semantic value in some context

In [153]:
###extractive summary
sample_df_subset = sample_df.head(500).copy()
sample_df_subset["extractive_summary"] = sample_df_subset["cleaned_text"].apply(lambda x: summarize_email(x, vectorizer, top_n=3))

In [154]:
from IPython.display import display
display(sample_df_subset[["file", "extractive_summary"]].head())

Unnamed: 0,file,extractive_summary
0,allen-p/deleted_items/195.,This request has been pending your approval for 10 days. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.
1,allen-p/all_documents/377.,"(Desktop, Laptop, Both) Do you have a PDA? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? Do you have access to a shared calendar?"
2,allen-p/deleted_items/153.,"Simpkins Winstead Sechrest & Minick P.C. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement."
3,allen-p/deleted_items/137.,Winning tickets will be drawn at 2:00 p.m. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. This event will be a blast.
4,allen-p/discussion_threads/226.,


In [155]:
sample_df = sample_df.head(200).copy()
sample_df["summary"] = sample_df["cleaned_text"].apply(lambda x: summarize_email(x, vectorizer))

In [156]:
print(sample_df[["file", "summary"]].head())

                              file  \
0       allen-p/deleted_items/195.   
1       allen-p/all_documents/377.   
2       allen-p/deleted_items/153.   
3       allen-p/deleted_items/137.   
4  allen-p/discussion_threads/226.   

                                                                                                                                                                                                                                                                                                            summary  
0  This request has been pending your approval for 10 days. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.  
1                                                                                                                                     (Desktop, Laptop,

In [157]:
display(sample_df[["cleaned_text", "summary"]].head())
sample_df[["message", "cleaned_text", "summary"]].to_csv("email_summaries_sample.csv", index=False)

Unnamed: 0,cleaned_text,summary
0,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,This request has been pending your approval for 10 days. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.
1,"02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):","(Desktop, Laptop, Both) Do you have a PDA? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? Do you have access to a shared calendar?"
2,"Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC","Simpkins Winstead Sechrest & Minick P.C. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement."
3,"This Sunday, October 28th, is the Juvenile Diabetes Research Foundation (JDRF) Walk to Cure Diabetes at Greenspoint Mall at 8:00 a.m. In preparation for the big event, we have several fun activities scheduled to take place this week as detailed below. JDRF Cyber Auction - The Cyber Auction will take place this Wednesday, October 24th, through Thursday, October 25th. For details please go to the Enron home page and click on JDRF Cyber Auction or click [LINK] for the direct link. The Auction this year is hosted by EGS. Big E Caf? - This Friday, October 26th, 11:30 - 1:00 p.m. on Andrews Street in front of the Enron Center North building. Lunch - Fajita lunch with all the trimmings provided by Taquera del Sol for $5.00. Entertainment - Live entertainment provided by Mango Punch. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. Raffle tickets for two roundtrip British Airways tickets for $10.00 each. Winning tickets will be drawn at 2:00 p.m. on Friday, October 26th. JDRF Bake Sale - Cakes, cookies and Halloween treats will be available for purchase. JDRF T-shirt Sale - Enron/JDRF T-shirts will be available for a $25 donation. JDRF Sneaker (paper) Sales - The competition continues between business units - sneakers will sale for $5.00 each. For those of you that have signed up to join us for the walk, please continue to collect donations and watch your email this week for further information regarding the Walk. For those of you that have not signed up, please join us for the Walk. Although we have only a few days remaining until the walk, it is not too late to sign up and join us for this great event. It only takes a moment to fill out a walk form and you will get an Enron/JDRF T-shirt for collect or donating $25 or more, and will join hundreds of Enron employees and several thousand Houstonians on the Walk. This event will be a blast. The Enron tent will be great with lots of good food and entertainment and everyone will have a fun time. Parking at the walk site will be free. If you cannot attend the Walk, please support one of your local walkers, participate in the cyber auction or join us for the Big E Caf? on Friday to participate in some of our other great fundraising activities. We want to keep our standing as the number one walk team in the Gulf Coast area, Texas, and the entire Southern Region of the U.S., as well as in the top 10 nationally. Please contact Janice Riedel at X-37507 or Cathy Phillips at X-36898 to sign up as a walker, make a donation, or ask any questions you may have. Come join the fun. Thank you for your support and generosity. Mike McConnell",Winning tickets will be drawn at 2:00 p.m. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. This event will be a blast.
4,"Transwestern Pipeline Co. posted new notice(s) since our last check at 12/13/2000 4:00:01 PM, the newest notice looks like: Capacity Constraint, Dec 13 2000 4:03PM, Dec 14 2000 9:00AM, Dec 15 2000 8:59AM, 2241, Allocation - San Juan Lateral Please click the following to go to the web site for detail. [LINK] =60",


In [158]:
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

Device set to use cpu


In [159]:
# sample_df["extractive_summary"] = sample_df["cleaned_text"].apply(lambda x: summarize_email(x, vectorizer))
# sample_df["abstractive_summary"] = sample_df["extractive_summary"].apply(summarize_abstractive)

In [160]:
sample_df["extractive_summary"] = sample_df["cleaned_text"].apply(lambda x: summarize_email(x, vectorizer))
sample_df["abstractive_summary"] = sample_df["extractive_summary"].apply(summarize_abstractive)

# Now take the subset
sample_df_subset = sample_df.sample(n=10, random_state=42)

Both `max_new_tokens` (=256) and `max_length`(=30) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=18) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=27) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=16) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both

In [161]:
sample_df_subset = sample_df.sample(n=10, random_state=42)

In [162]:
sample_df.head()

Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts,nltk_processed_text,top_keywords,summary,extractive_summary,abstractive_summary
0,allen-p/deleted_items/195.,"Message-ID: <5558637.1075858635446.JavaMail.evans@thyme>\nDate: Wed, 24 Oct 2001 17:30:02 -0700 (PDT)\nFrom: arsystem@mailman.enron.com\nTo: k..allen@enron.com\nSubject: Your Approval is Overdue: Access Request for matt.smith@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: ARSystem <ARSystem@mailman.enron.com>@ENRON\nX-To: Allen, Phillip K. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PALLEN>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications\n\n\n\n",5558637.1075858635446.JavaMail.evans@thyme,"Wed, 24 Oct 2001 17:30:02 -0700 (PDT)",arsystem@mailman.enron.com,k..allen@enron.com,Your Approval is Overdue: Access Request for matt.smith@enron.com,This request has been pending your approval for 10 days. Please click http://itcapps.corp.enron.com/srrs/auth/emailLink.asp?ID=000000000067320&Page=Approval to review and act upon this request.\n\n\n\n\n\nRequest ID : 000000000067320\nRequest Create Date : 10/11/01 10:24:53 AM\nRequested For : matt.smith@enron.com\nResource Name : Risk Acceptance Forms Local Admin Rights - Permanent\nResource Type : Applications,This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications,[This request has been pending your approval for 10 days. Please click [LINK] to review and act upon this request. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications],request pending approval day please click link review act upon request request id phone request create date requested email resource name risk acceptance form local admin right permanent resource type application,"[request, resource, admin, pending, acceptance, permanent, act, local, approval, upon]",This request has been pending your approval for 10 days. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.,This request has been pending your approval for 10 days. Request ID : [PHONE]67320 Request Create Date : 10/11/01 10:24:53 AM Requested For : [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.,Request ID : [PHONE]67320 Request Create Date: 10/11/01 10:24:53 AM Requested For: [EMAIL] Resource Name : Risk Acceptance Forms Local Admin Rights - Permanent Resource Type : Applications Please click [LINK] to review and act upon this request.
1,allen-p/all_documents/377.,"Message-ID: <227662.1075855694115.JavaMail.evans@thyme>\nDate: Tue, 1 May 2001 07:14:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: ina.rangel@enron.com\nSubject: 2- SURVEY/INFORMATION EMAIL\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Ina Rangel\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\All documents\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\n---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY): \n\n\n",227662.1075855694115.JavaMail.evans@thyme,"Tue, 1 May 2001 07:14:00 -0700 (PDT)",phillip.allen@enron.com,ina.rangel@enron.com,2- SURVEY/INFORMATION EMAIL,"---------------------- Forwarded by Phillip K Allen/HOU/ECT on 05/01/2001 \n02:14 PM ---------------------------\n\n\nOutlook Migration Team@ENRON\n04/27/2001 01:00 PM\nTo: Allison Horton/NA/Enron@ENRON, Amir Baig/NA/Enron@ENRON, Brandon \nBangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles \nPhilpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris \nTull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, \nDonald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna \nMorrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn \nSchultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia \nBarrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj \nPerubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea \nYowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne \nCastellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann \nMatson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick \nJohnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A \nHope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald \nFain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna \nHarris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi \nMonson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John \nStabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James \nGramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer \nJohnson/Contractor/Enron Communications@Enron Communications, Jim \nLittle/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald \nMartin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin \nMitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley \nPearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, \nNatalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A \nRichardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary \nSimmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David \nUpton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel \nClick/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy \nGross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny \nJones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar \nPonce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance \nStanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M \nTholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, \nMatthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique \nSanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, \nTori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne \nWukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike \nPotter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne \nCalcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia \nFranklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan \nCollins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, \nLia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert \nAllwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna \nBoudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara \nCarter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron \nCommunications@Enron Communications, Jack Netek/Enron Communications@Enron \nCommunications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, \nCraig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy \nGagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth \nBalladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA\ncc: \nSubject: 2- SURVEY/INFORMATION EMAIL\n\nCurrent Notes User: \n\nTo ensure that you experience a successful migration from Notes to Outlook, \nit is necessary to gather individual user information prior to your date of \nmigration. Please take a few minutes to completely fill out the following \nsurvey. When you finish, simply click on the 'Reply' button then hit 'Send' \nYour survey will automatically be sent to the Outlook 2000 Migration Mailbox.\n\nThank you.\n\nOutlook 2000 Migration Team\n\n------------------------------------------------------------------------------\n--------------------------------------------------------------\n\nFull Name: \n\nLogin ID: \n\nExtension: \n\nOffice Location: \n\nWhat type of computer do you have? (Desktop, Laptop, Both) \n\nDo you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, \nJornada) \n\nDo you have permission to access anyone's Email/Calendar? \n If yes, who? \n\nDoes anyone have permission to access your Email/Calendar? \n If yes, who? \n\nAre you responsible for updating anyone else's address book? \n If yes, who? \n\nIs anyone else responsible for updating your address book? \n If yes, who? \n\nDo you have access to a shared calendar? \n If yes, which shared calendar? \n\nDo you have any Distribution Groups that Messaging maintains for you (for \nmass mailings)? \n If yes, please list here: \n\nPlease list all Notes databases applications that you currently use: \n\nIn our efforts to plan the exact date/time of your migration, we also will \nneed to know:\n\nWhat are your normal work hours? From: To: \n\nWill you be out of the office in the near future for vacation, leave, etc?\n If so, when? From (MM/DD/YY): To (MM/DD/YY):","02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):","[02:14 PM --------------------------- Outlook Migration Team@ENRON 04/27/2001 01:00 PM Bangerter/NA/Enron@Enron, Brian Ellis/Corp/Enron@Enron, Charles Philpott/HR/Corp/Enron@ENRON, Chris P Wood/NA/Enron@Enron, Chris Tull/HOU/ECT@ECT, Dale Smith/Corp/Enron@ENRON, Dave June/NA/Enron@ENRON, Donald Sutton/NA/Enron@Enron, Felicia Buenrostro/HR/Corp/Enron@ENRON, Johnna Morrison/Corp/Enron@ENRON, Joe Dorn/Corp/Enron@ENRON, Kathryn Schultea/HR/Corp/Enron@ENRON, Leon McDowell/NA/Enron@ENRON, Leticia Barrios/Corp/Enron@ENRON, Milton Brown/HR/Corp/Enron@ENRON, Raj Perubhatla/Corp/Enron@Enron, Shekar Komatireddy/NA/Enron@Enron, Andrea Yowman/Corp/Enron@ENRON, Angie O'Brian/HR/Corp/Enron@ENRON, Bonne Castellano/HR/Corp/Enron@ENRON, Gwynn Gorsuch/NA/Enron@ENRON, Jo Ann Matson/Corp/Enron@ENRON, LaQuitta Washington/HR/Corp/Enron@ENRON, Rick Johnson/HR/Corp/Enron@ENRON, Sandra Lighthill/HR/Corp/Enron@ENRON, Valeria A Hope/HOU/ECT@ECT, Charlotte Brown/HR/Corp/Enron@ENRON, Ronald Fain/HR/Corp/Enron@ENRON, Gary Fitch/HR/Corp/Enron@Enron, Anna Harris/HR/Corp/Enron@ENRON, Keith Jones/HR/Corp/Enron@ENRON, Kristi Monson/NA/Enron@Enron, Bobbie McNiel/HR/Corp/Enron@ENRON, John Stabler/HR/Corp/Enron@ENRON, Michelle Prince/NA/Enron@Enron, James Gramke/NA/Enron@ENRON, Blair Hicks/NA/Enron@ENRON, Jennifer Johnson/Contractor/Enron Communications@Enron Communications, Jim Little/Enron@EnronXGate, Dale Lukert/NA/Enron@ENRON, Donald Martin/NA/Enron@ENRON, Andrew Mattei/NA/Enron@ENRON, Darvin Mitchell/NA/Enron@ENRON, Mark Oldham/NA/Enron@ENRON, Wesley Pearson/NA/Enron@ENRON, Ramon Pizarro/ENRON_DEVELOPMENT@ENRON_DEVELOPMENT, Natalie Rau/NA/Enron@ENRON, William Redick/NA/Enron@ENRON, Mark A Richardson/NA/Enron@ENRON, Joseph Schnieders/NA/Enron@ENRON, Gary Simmons/NA/Enron@Enron, Delaney Trimble/NA/Enron@ENRON, David Upton/NA/Enron@ENRON, Mike Boegler/HR/Corp/Enron@ENRON, Lyndel Click/HR/Corp/Enron@ENRON, Gabriel Franco/NA/Enron@Enron, Randy Gross/HR/Corp/Enron@Enron, Arthur Johnson/HR/Corp/Enron@Enron, Danny Jones/HR/Corp/Enron@ENRON, John Ogden/Houston/Eott@Eott, Edgar Ponce/NA/Enron@Enron, Tracy Pursifull/HR/Corp/Enron@ENRON, Lance Stanley/HR/Corp/Enron@ENRON, Frank Ermis/HOU/ECT@ECT, Jane M Tholt/HOU/ECT@ECT, Jay Reitmeyer/HOU/ECT@ECT, Keith Holst/HOU/ECT@ect, Matthew Lenhart/HOU/ECT@ECT, Mike Grigsby/HOU/ECT@ECT, Monique Sanchez/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT, Randall L Gay/HOU/ECT@ECT, Tori Kuykendall/HOU/ECT@ECT, Brenda H Fletcher/HOU/ECT@ECT, Jeanne Wukasch/Corp/Enron@ENRON, Mary Theresa Franklin/HOU/ECT@ECT, Mike Potter/NA/Enron@Enron, Natalie Baker/HOU/ECT@ECT, Suzanne Calcagno/NA/Enron@Enron, Alvin Thompson/Corp/Enron@Enron, Cynthia Franklin/Corp/Enron@ENRON, Jesse Villarreal/HOU/ECT@ECT, Joan Collins/HOU/EES@EES, Joe A Casas/HOU/ECT@ECT, Kelly Loocke/ENRON@enronXgate, Lia Halstead/NA/Enron@ENRON, Meredith Homco/HOU/ECT@ECT, Robert Allwein/HOU/ECT@ECT, Scott Loving/NA/Enron@ENRON, Shanna Boudreaux/ENRON@enronXgate, Steve Gillespie/Corp/Enron@ENRON, Tamara Carter/NA/Enron@ENRON, Tracy Wood/NA/Enron@ENRON, Gabriel Fuzat/Enron Communications@Enron Communications, Jack Netek/Enron Communications@Enron Communications, Lam Nguyen/NA/Enron@Enron, Camille Gerard/Corp/Enron@ENRON, Craig Taylor/HOU/ECT@ECT, Jessica Hangach/NYC/MGUSA@MGUSA, Kathy Gagel/NYC/MGUSA@MGUSA, Lisa Goulart/NYC/MGUSA@MGUSA, Ruth Balladares/NYC/MGUSA@MGUSA, Sid Strutt/NYC/MGUSA@MGUSA Current Notes User: To ensure that you experience a successful migration from Notes to Outlook, it is necessary to gather individual user information prior to your date of migration. Please take a few minutes to completely fill out the following survey. When you finish, simply click on the 'Reply' button then hit 'Send' Your survey will automatically be sent to the Outlook 2000 Migration Mailbox. Thank you. Outlook 2000 Migration Team ------------------------------------------------------------------------------ -------------------------------------------------------------- Full Name: Login ID: Extension: Office Location: What type of computer do you have? (Desktop, Laptop, Both) Do you have a PDA? If yes, what type do you have: (None, IPAQ, Palm Pilot, Jornada) Do you have permission to access anyone's Email/Calendar? If yes, who? Does anyone have permission to access your Email/Calendar? If yes, who? Are you responsible for updating anyone else's address book? If yes, who? Is anyone else responsible for updating your address book? If yes, who? Do you have access to a shared calendar? If yes, which shared calendar? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? If yes, please list here: Please list all Notes databases applications that you currently use: In our efforts to plan the exact date/time of your migration, we also will need to know: Will you be out of the office in the near future for vacation, leave, etc? If so, when? From (MM/DD/YY): To (MM/DD/YY):]",pm outlook migration team enron pm bangerter na enron enron brian elli corp enron enron charles philpott hr corp enron enron chris p wood na enron enron chris tull hou ect ect dale smith corp enron enron dave june na enron enron donald sutton na enron enron felicia buenrostro hr corp enron enron johnna morrison corp enron enron joe dorn corp enron enron kathryn schultea hr corp enron enron leon mcdowell na enron enron leticia barrio corp enron enron milton brown hr corp enron enron raj perubhatla corp enron enron shekar komatireddy na enron enron andrea yowman corp enron enron angie brian hr corp enron enron bonne castellano hr corp enron enron gwynn gorsuch na enron enron jo ann matson corp enron enron laquitta washington hr corp enron enron rick johnson hr corp enron enron sandra lighthill hr corp enron enron valeria hope hou ect ect charlotte brown hr corp enron enron ronald fain hr corp enron enron gary fitch hr corp enron enron anna harris hr corp enron enron keith jones hr corp enron enron kristi monson na enron enron bobbie mcniel hr corp enron enron john stabler hr corp enron enron michelle prince na enron enron james gramke na enron enron blair hick na enron enron jennifer johnson contractor enron communication enron communication jim little enron enronxgate dale lukert na enron enron donald martin na enron enron andrew mattei na enron enron darvin mitchell na enron enron mark oldham na enron enron wesley pearson na enron enron ramon pizarro enron_development enron_development natalie rau na enron enron william redick na enron enron mark richardson na enron enron joseph schnieders na enron enron gary simmons na enron enron delaney trimble na enron enron david upton na enron enron mike boegler hr corp enron enron lyndel click hr corp enron enron gabriel franco na enron enron randy gross hr corp enron enron arthur johnson hr corp enron enron danny jones hr corp enron enron john ogden houston eott eott edgar ponce na enron enron tracy pursifull hr corp enron enron lance stanley hr corp enron enron frank ermis hou ect ect jane tholt hou ect ect jay reitmeyer hou ect ect keith holst hou ect ect matthew lenhart hou ect ect mike grigsby hou ect ect monique sanchez hou ect ect phillip k allen hou ect ect randall l gay hou ect ect torus kuykendall hou ect ect brenda h fletcher hou ect ect jeanne wukasch corp enron enron mary theresa franklin hou ect ect mike potter na enron enron natalie baker hou ect ect suzanne calcagno na enron enron alvin thompson corp enron enron cynthia franklin corp enron enron jesse villarreal hou ect ect joan collins hou ee ee joe casas hou ect ect kelly loocke enron enronxgate lia halstead na enron enron meredith homco hou ect ect robert allwein hou ect ect scott loving na enron enron shanna boudreaux enron enronxgate steve gillespie corp enron enron tamara carter na enron enron tracy wood na enron enron gabriel fuzat enron communication enron communication jack netek enron communication enron communication lam nguyen na enron enron camille gerard corp enron enron craig taylor hou ect ect jessica hangach nyc mgusa mgusa kathy gagel nyc mgusa mgusa lisa goulart nyc mgusa mgusa ruth balladares nyc mgusa mgusa sid strutt nyc mgusa mgusa current note user ensure experience successful migration note outlook necessary gather individual user information prior date migration please take minute completely fill following survey finish simply click reply button hit send survey automatically sent outlook migration mailbox thank outlook migration team full name login id extension office location type computer desktop laptop pda yes type none ipaq palm pilot jornada permission access anyone email calendar yes anyone permission access email calendar yes responsible updating anyone else address book yes anyone else responsible updating address book yes access shared calendar yes shared calendar distribution group messaging maintains mass mailing yes please list please list note database application currently use effort plan exact date time migration also need know office near future vacation leave etc mm dd yy mm dd yy,"[enron, ect, na, corp, hr, hou, mgusa, yes, migration, nyc]","(Desktop, Laptop, Both) Do you have a PDA? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? Do you have access to a shared calendar?","(Desktop, Laptop, Both) Do you have a PDA? Do you have any Distribution Groups that Messaging maintains for you (for mass mailings)? Do you have access to a shared calendar?",Do you have a PDA? Have you any Distribution Groups that Messaging maintains for you (for mass mailings) or have access to a shared calendar?
2,allen-p/deleted_items/153.,"Message-ID: <14009873.1075858634205.JavaMail.evans@thyme>\nDate: Mon, 13 Aug 2001 08:47:16 -0700 (PDT)\nFrom: msimpkins@winstead.com\nTo: pallen@enron.com, pallen70@hotmail.com\nSubject: Revised Utility Construction Escrow Agreement - Lakeline Apts.\nCc: michaelb@amhms.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: michaelb@amhms.com\nX-From: ""Simpkins, Michelle"" <MSimpkins@winstead.com>@ENRON <IMCEANOTES-+22Simpkins+2C+20Michelle+22+20+3CMSimpkins+40winstead+2Ecom+3E+40ENRON@ENRON.com>\nX-To: 'pallen@enron.com', 'pallen70@hotmail.com'\nX-cc: 'michaelb@amhms.com'\nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\n <<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC",14009873.1075858634205.JavaMail.evans@thyme,"Mon, 13 Aug 2001 08:47:16 -0700 (PDT)",msimpkins@winstead.com,"pallen@enron.com, pallen70@hotmail.com",Revised Utility Construction Escrow Agreement - Lakeline Apts.,"<<3M@X05!.DOC>>\nPhillip,\n\nEnclosed is a draft of the revised Escrow Agreement based on the Lender's\ncomments. I am coordinating with the Lender regarding the reimbursement\nprovisions in the event Agape fails to receive reimbursement from Ryland.\nWe may do a separate agreement between you, AMHP and McCall in the event\nAgape fails to receive reimbursement. Please contact me at (512) 370-2836\nor Michael Bobinchuck with any questions or concerns. Thanks.\n\nMichelle L. Simpkins\nWinstead Sechrest & Minick P.C.\n100 Congress Avenue, Suite 800\nAustin, Texas 78701\n(512) 370-2836\n(512) 370-2850 Fax\nmsimpkins@winstead.com\n\n\n - 3M@X05!.DOC","Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC","[Phillip, Enclosed is a draft of the revised Escrow Agreement based on the Lender's comments. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement. Please contact me at [PHONE] or Michael Bobinchuck with any questions or concerns. Thanks. Michelle L. Simpkins Winstead Sechrest & Minick P.C. 100 Congress Avenue, Suite 800 Austin, Texas 78701 [PHONE] [PHONE] Fax [EMAIL] - 3M@X05!.DOC]",phillip enclosed draft revised escrow agreement based lender comment coordinating lender regarding reimbursement provision event agape fails receive reimbursement ryland may separate agreement amhp mccall event agape fails receive reimbursement please contact phone michael bobinchuck question concern thanks michelle l simpkins winstead sechrest minick p c congress avenue suite austin texas phone phone fax email 3m x05 doc,"[reimbursement, agape, fails, lender, event, agreement, coordinating, ryland, x05, mccall]","Simpkins Winstead Sechrest & Minick P.C. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement.","Simpkins Winstead Sechrest & Minick P.C. I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall in the event Agape fails to receive reimbursement.","I am coordinating with the Lender regarding the reimbursement provisions in the event Agape fails to receive reimbursement from Ryland. We may do a separate agreement between you, AMHP and McCall ."
3,allen-p/deleted_items/137.,"Message-ID: <31173707.1075858633755.JavaMail.evans@thyme>\nDate: Mon, 22 Oct 2001 20:16:30 -0700 (PDT)\nFrom: no.address@enron.com\nSubject: JDRF Cyber Auction & Update Information\nMime-Version: 1.0\nContent-Type: text/plain; charset=ANSI_X3.4-1968\nContent-Transfer-Encoding: 7bit\nX-From: EGM Office of the Chairman@ENRON\nX-To: All Enron Houston@ENRON <??SAll Enron Houston@ENRON>\nX-cc: \nX-bcc: \nX-Folder: \PALLEN (Non-Privileged)\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: PALLEN (Non-Privileged).pst\n\nThis Sunday, October 28th, is the Juvenile Diabetes Research Foundation (JDRF) Walk to Cure Diabetes at Greenspoint Mall at 8:00 a.m. In preparation for the big event, we have several fun activities scheduled to take place this week as detailed below.\n\nJDRF Cyber Auction - The Cyber Auction will take place this Wednesday, October 24th, through Thursday, October 25th. For details please go to the Enron home page and click on JDRF Cyber Auction or click http://ecpdxapps01.enron.net/apps/auction.nsf for the direct link. The Auction this year is hosted by EGS. \n\nBig E Caf? - This Friday, October 26th, 11:30 - 1:00 p.m. on Andrews Street in front of the Enron Center North building.\nLunch - Fajita lunch with all the trimmings provided by Taquera del Sol for $5.00.\nEntertainment - Live entertainment provided by Mango Punch.\nJDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. Raffle tickets for two roundtrip British Airways tickets for $10.00 each. Winning tickets will be drawn at 2:00 p.m. on Friday, October 26th. \nJDRF Bake Sale - Cakes, cookies and Halloween treats will be available for purchase.\nJDRF T-shirt Sale - Enron/JDRF T-shirts will be available for a $25 donation.\nJDRF Sneaker (paper) Sales - The competition continues between business units - sneakers will sale for $5.00 each.\n\nFor those of you that have signed up to join us for the walk, please continue to collect donations and watch your email this week for further information regarding the Walk. For those of you that have not signed up, please join us for the Walk. Although we have only a few days remaining until the walk, it is not too late to sign up and join us for this great event. It only takes a moment to fill out a walk form and you will get an Enron/JDRF T-shirt for collect or donating $25 or more, and will join hundreds of Enron employees and several thousand Houstonians on the Walk. This event will be a blast. The Enron tent will be great with lots of good food and entertainment and everyone will have a fun time. Parking at the walk site will be free.\n\nIf you cannot attend the Walk, please support one of your local walkers, participate in the cyber auction or join us for the Big E Caf? on Friday to participate in some of our other great fundraising activities. We want to keep our standing as the number one walk team in the Gulf Coast area, Texas, and the entire Southern Region of the U.S., as well as in the top 10 nationally.\n\nPlease contact Janice Riedel at X-37507 or Cathy Phillips at X-36898 to sign up as a walker, make a donation, or ask any questions you may have. Come join the fun.\n\nThank you for your support and generosity.\n\nMike McConnell",31173707.1075858633755.JavaMail.evans@thyme,"Mon, 22 Oct 2001 20:16:30 -0700 (PDT)",no.address@enron.com,All Enron Houston@ENRON <??SAll Enron Houston@ENRON>,JDRF Cyber Auction & Update Information,"This Sunday, October 28th, is the Juvenile Diabetes Research Foundation (JDRF) Walk to Cure Diabetes at Greenspoint Mall at 8:00 a.m. In preparation for the big event, we have several fun activities scheduled to take place this week as detailed below.\n\nJDRF Cyber Auction - The Cyber Auction will take place this Wednesday, October 24th, through Thursday, October 25th. For details please go to the Enron home page and click on JDRF Cyber Auction or click http://ecpdxapps01.enron.net/apps/auction.nsf for the direct link. The Auction this year is hosted by EGS. \n\nBig E Caf? - This Friday, October 26th, 11:30 - 1:00 p.m. on Andrews Street in front of the Enron Center North building.\nLunch - Fajita lunch with all the trimmings provided by Taquera del Sol for $5.00.\nEntertainment - Live entertainment provided by Mango Punch.\nJDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. Raffle tickets for two roundtrip British Airways tickets for $10.00 each. Winning tickets will be drawn at 2:00 p.m. on Friday, October 26th. \nJDRF Bake Sale - Cakes, cookies and Halloween treats will be available for purchase.\nJDRF T-shirt Sale - Enron/JDRF T-shirts will be available for a $25 donation.\nJDRF Sneaker (paper) Sales - The competition continues between business units - sneakers will sale for $5.00 each.\n\nFor those of you that have signed up to join us for the walk, please continue to collect donations and watch your email this week for further information regarding the Walk. For those of you that have not signed up, please join us for the Walk. Although we have only a few days remaining until the walk, it is not too late to sign up and join us for this great event. It only takes a moment to fill out a walk form and you will get an Enron/JDRF T-shirt for collect or donating $25 or more, and will join hundreds of Enron employees and several thousand Houstonians on the Walk. This event will be a blast. The Enron tent will be great with lots of good food and entertainment and everyone will have a fun time. Parking at the walk site will be free.\n\nIf you cannot attend the Walk, please support one of your local walkers, participate in the cyber auction or join us for the Big E Caf? on Friday to participate in some of our other great fundraising activities. We want to keep our standing as the number one walk team in the Gulf Coast area, Texas, and the entire Southern Region of the U.S., as well as in the top 10 nationally.\n\nPlease contact Janice Riedel at X-37507 or Cathy Phillips at X-36898 to sign up as a walker, make a donation, or ask any questions you may have. Come join the fun.\n\nThank you for your support and generosity.\n\nMike McConnell","This Sunday, October 28th, is the Juvenile Diabetes Research Foundation (JDRF) Walk to Cure Diabetes at Greenspoint Mall at 8:00 a.m. In preparation for the big event, we have several fun activities scheduled to take place this week as detailed below. JDRF Cyber Auction - The Cyber Auction will take place this Wednesday, October 24th, through Thursday, October 25th. For details please go to the Enron home page and click on JDRF Cyber Auction or click [LINK] for the direct link. The Auction this year is hosted by EGS. Big E Caf? - This Friday, October 26th, 11:30 - 1:00 p.m. on Andrews Street in front of the Enron Center North building. Lunch - Fajita lunch with all the trimmings provided by Taquera del Sol for $5.00. Entertainment - Live entertainment provided by Mango Punch. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. Raffle tickets for two roundtrip British Airways tickets for $10.00 each. Winning tickets will be drawn at 2:00 p.m. on Friday, October 26th. JDRF Bake Sale - Cakes, cookies and Halloween treats will be available for purchase. JDRF T-shirt Sale - Enron/JDRF T-shirts will be available for a $25 donation. JDRF Sneaker (paper) Sales - The competition continues between business units - sneakers will sale for $5.00 each. For those of you that have signed up to join us for the walk, please continue to collect donations and watch your email this week for further information regarding the Walk. For those of you that have not signed up, please join us for the Walk. Although we have only a few days remaining until the walk, it is not too late to sign up and join us for this great event. It only takes a moment to fill out a walk form and you will get an Enron/JDRF T-shirt for collect or donating $25 or more, and will join hundreds of Enron employees and several thousand Houstonians on the Walk. This event will be a blast. The Enron tent will be great with lots of good food and entertainment and everyone will have a fun time. Parking at the walk site will be free. If you cannot attend the Walk, please support one of your local walkers, participate in the cyber auction or join us for the Big E Caf? on Friday to participate in some of our other great fundraising activities. We want to keep our standing as the number one walk team in the Gulf Coast area, Texas, and the entire Southern Region of the U.S., as well as in the top 10 nationally. Please contact Janice Riedel at X-37507 or Cathy Phillips at X-36898 to sign up as a walker, make a donation, or ask any questions you may have. Come join the fun. Thank you for your support and generosity. Mike McConnell","[This Sunday, October 28th, is the Juvenile Diabetes Research Foundation (JDRF) Walk to Cure Diabetes at Greenspoint Mall at 8:00 a.m. In preparation for the big event, we have several fun activities scheduled to take place this week as detailed below. JDRF Cyber Auction - The Cyber Auction will take place this Wednesday, October 24th, through Thursday, October 25th. For details please go to the Enron home page and click on JDRF Cyber Auction or click [LINK] for the direct link. The Auction this year is hosted by EGS. Big E Caf? - This Friday, October 26th, 11:30 - 1:00 p.m. on Andrews Street in front of the Enron Center North building. Lunch - Fajita lunch with all the trimmings provided by Taquera del Sol for $5.00. Entertainment - Live entertainment provided by Mango Punch. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. Raffle tickets for two roundtrip British Airways tickets for $10.00 each. Winning tickets will be drawn at 2:00 p.m. on Friday, October 26th. JDRF Bake Sale - Cakes, cookies and Halloween treats will be available for purchase. JDRF T-shirt Sale - Enron/JDRF T-shirts will be available for a $25 donation. JDRF Sneaker (paper) Sales - The competition continues between business units - sneakers will sale for $5.00 each. For those of you that have signed up to join us for the walk, please continue to collect donations and watch your email this week for further information regarding the Walk. For those of you that have not signed up, please join us for the Walk. Although we have only a few days remaining until the walk, it is not too late to sign up and join us for this great event. It only takes a moment to fill out a walk form and you will get an Enron/JDRF T-shirt for collect or donating $25 or more, and will join hundreds of Enron employees and several thousand Houstonians on the Walk. This event will be a blast. The Enron tent will be great with lots of good food and entertainment and everyone will have a fun time. Parking at the walk site will be free. If you cannot attend the Walk, please support one of your local walkers, participate in the cyber auction or join us for the Big E Caf? on Friday to participate in some of our other great fundraising activities. We want to keep our standing as the number one walk team in the Gulf Coast area, Texas, and the entire Southern Region of the U.S., as well as in the top 10 nationally. Please contact Janice Riedel at X-37507 or Cathy Phillips at X-36898 to sign up as a walker, make a donation, or ask any questions you may have. Come join the fun. Thank you for your support and generosity. Mike McConnell]",sunday october 28th juvenile diabetes research foundation jdrf walk cure diabetes greenspoint mall preparation big event several fun activity scheduled take place week detailed jdrf cyber auction cyber auction take place wednesday october 24th thursday october 25th detail please go enron home page click jdrf cyber auction click link direct link auction year hosted egs big e caf friday october 26th p andrew street front enron center north building lunch fajita lunch trimming provided taquera del sol entertainment live entertainment provided mango punch jdrf raffle raffle ticket two roundtrip continental airline ticket raffle ticket two roundtrip british airway ticket winning ticket drawn p friday october 26th jdrf bake sale cake cooky halloween treat available purchase jdrf shirt sale enron jdrf shirt available donation jdrf sneaker paper sale competition continues business unit sneaker sale signed join u walk please continue collect donation watch email week information regarding walk signed please join u walk although day remaining walk late sign join u great event take moment fill walk form get enron jdrf shirt collect donating join hundred enron employee several thousand houstonians walk event blast enron tent great lot good food entertainment everyone fun time parking walk site free cannot attend walk please support one local walker participate cyber auction join u big e caf friday participate great fundraising activity want keep standing number one walk team gulf coast area texas entire southern region u well top nationally please contact janice riedel x cathy phillips x sign walker make donation ask question may come join fun thank support generosity mike mcconnell,"[jdrf, walk, join, ticket, auction, cyber, shirt, raffle, october, donation]",Winning tickets will be drawn at 2:00 p.m. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. This event will be a blast.,Winning tickets will be drawn at 2:00 p.m. JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each. This event will be a blast.,JDRF Raffle - Raffle tickets for two roundtrip Continental Airline tickets for $5.00 each . This event will be a blast .
4,allen-p/discussion_threads/226.,"Message-ID: <6965475.1075855678439.JavaMail.evans@thyme>\nDate: Wed, 13 Dec 2000 08:34:00 -0800 (PST)\nFrom: critical.notice@enron.com\nTo: ywang@enron.com, patti.sullivan@enron.com, phillip.k.allen@enron.com, \n\tjane.m.tholt@enron.com, mike.grigsby@enron.com\nSubject: New Notice from Transwestern Pipeline Co.\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: critical.notice@Enron.com\nX-To: ywang@Enron.com, Patti.Sullivan@Enron.com, Phillip.K.Allen@Enron.com, jane.m.tholt@Enron.com, Mike.Grigsby@Enron.com\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\Discussion threads\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nTranswestern Pipeline Co. posted new notice(s) since our last check at \n12/13/2000 4:00:01 PM, the newest notice looks like:\n\n Capacity Constraint, Dec 13 2000 4:03PM, Dec 14 2000 9:00AM, Dec 15 2000 \n8:59AM, 2241, Allocation - San Juan Lateral\n\nPlease click the following to go to the web site for detail.\n\nhttp://ios.ets.enron.com/infoPostings/shared/et_noncritical_notice.asp?company\n=60",6965475.1075855678439.JavaMail.evans@thyme,"Wed, 13 Dec 2000 08:34:00 -0800 (PST)",critical.notice@enron.com,"ywang@enron.com, patti.sullivan@enron.com, phillip.k.allen@enron.com,",New Notice from Transwestern Pipeline Co.,"Transwestern Pipeline Co. posted new notice(s) since our last check at \n12/13/2000 4:00:01 PM, the newest notice looks like:\n\n Capacity Constraint, Dec 13 2000 4:03PM, Dec 14 2000 9:00AM, Dec 15 2000 \n8:59AM, 2241, Allocation - San Juan Lateral\n\nPlease click the following to go to the web site for detail.\n\nhttp://ios.ets.enron.com/infoPostings/shared/et_noncritical_notice.asp?company\n=60","Transwestern Pipeline Co. posted new notice(s) since our last check at 12/13/2000 4:00:01 PM, the newest notice looks like: Capacity Constraint, Dec 13 2000 4:03PM, Dec 14 2000 9:00AM, Dec 15 2000 8:59AM, 2241, Allocation - San Juan Lateral Please click the following to go to the web site for detail. [LINK] =60","[Transwestern Pipeline Co. posted new notice(s) since our last check at 12/13/2000 4:00:01 PM, the newest notice looks like: Capacity Constraint, Dec 13 2000 4:03PM, Dec 14 2000 9:00AM, Dec 15 2000 8:59AM, 2241, Allocation - San Juan Lateral Please click the following to go to the web site for detail. [LINK] =60]",transwestern pipeline co posted new notice since last check pm newest notice look like capacity constraint dec 03pm dec 00am dec 59am allocation san juan lateral please click following go web site detail link,"[dec, notice, 03pm, 59am, lateral, constraint, transwestern, allocation, 00am, newest]",,,


In [163]:
if "abstractive_summary" in row:
    abstractive = row["abstractive_summary"]
else:
    print(f"No abstractive_summary found in row {idx}")


No abstractive_summary found in row 8449


In [164]:
extractive_scores = []
abstractive_scores = []

for idx, row in sample_df_subset.iterrows():
    ref = row["cleaned_text"]
    extractive = row["extractive_summary"]
    abstractive = row["abstractive_summary"]
    
    extractive_scores.append(compute_rouge_scores(ref, extractive))
    abstractive_scores.append(compute_rouge_scores(ref, abstractive))


In [165]:
extractive_df = pd.DataFrame(extractive_scores).add_prefix("extractive_")
abstractive_df = pd.DataFrame(abstractive_scores).add_prefix("abstractive_")

evaluated_df = pd.concat([sample_df_subset, extractive_df, abstractive_df], axis=1)
evaluated_df.head()

Unnamed: 0,file,message,message_id,date,from,to,subject,body,cleaned_text,email_parts,...,top_keywords,summary,extractive_summary,abstractive_summary,extractive_rouge1,extractive_rouge2,extractive_rougeL,abstractive_rouge1,abstractive_rouge2,abstractive_rougeL
95,allen-p/all_documents/623.,"Message-ID: <33197631.1075855699841.JavaMail.evans@thyme>\nDate: Thu, 14 Dec 2000 06:15:00 -0800 (PST)\nFrom: phillip.allen@enron.com\nTo: stagecoachmama@hotmail.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: stagecoachmama@hotmail.com\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\All documents\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nLucy,\n\nHere is a new file for 12/15.\n\n\n\n\nFor the rentroll for 12/08 here are my questions:\n\n #23 & #24 did not pay. Just late or moving?\n\n #25 & #33 Both paid 130 on 12/01 and $0 on 12/08. What is the deal?\n\n #11 Looks like she is caught up. When is she due again?\n\n\nPlease email the answers.\n\nPhillip\n",33197631.1075855699841.JavaMail.evans@thyme,"Thu, 14 Dec 2000 06:15:00 -0800 (PST)",phillip.allen@enron.com,stagecoachmama@hotmail.com,Mime-Version: 1.0,"Lucy,\n\nHere is a new file for 12/15.\n\n\n\n\nFor the rentroll for 12/08 here are my questions:\n\n #23 & #24 did not pay. Just late or moving?\n\n #25 & #33 Both paid 130 on 12/01 and $0 on 12/08. What is the deal?\n\n #11 Looks like she is caught up. When is she due again?\n\n\nPlease email the answers.\n\nPhillip","Lucy, Here is a new file for 12/15. For the rentroll for 12/08 here are my questions: #23 & #24 did not pay. Just late or moving? #25 & #33 Both paid 130 on 12/01 and $0 on 12/08. What is the deal? #11 Looks like she is caught up. When is she due again? Please email the answers. Phillip","[Lucy, Here is a new file for 12/15. For the rentroll for 12/08 here are my questions: #23 & #24 did not pay. Just late or moving? #25 & #33 Both paid 130 on 12/01 and $0 on 12/08. What is the deal? #11 Looks like she is caught up. When is she due again? Please email the answers. Phillip]",...,"[caught, moving, rentroll, answer, late, pay, paid, lucy, due, deal]",#25 & #33 Both paid 130 on 12/01 and $0 on 12/08. For the rentroll for 12/08 here are my questions: #23 & #24 did not pay. #11 Looks like she is caught up.,#25 & #33 Both paid 130 on 12/01 and $0 on 12/08. For the rentroll for 12/08 here are my questions: #23 & #24 did not pay. #11 Looks like she is caught up.,#23 & #24 did not pay for 12/08 . #11 Looks like she is caught up in the rentroll .,,,,,,
15,allen-p/discussion_threads/523.,"Message-ID: <5902357.1075855711087.JavaMail.evans@thyme>\nDate: Thu, 19 Apr 2001 05:03:00 -0700 (PDT)\nFrom: gthorse@keyad.com\nTo: phillip.k.allen@enron.com\nSubject: Bishops Corner\nMime-Version: 1.0\nContent-Type: text/plain; charset=ANSI_X3.4-1968\nContent-Transfer-Encoding: 7bit\nX-From: ""Greg Thorse"" <gthorse@keyad.com>\nX-To: <Phillip.K.Allen@enron.com>\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\Discussion threads\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nPhillip & Kieth;\n?\nI completed the following documents last night and I forgot to get them \ne-mailed to you, sorry.\n?\nPlease call me later today.\n?\nGreg\n - MapApplicationTeam Budget.xls\n - Phillip Allen 4.18.01.doc",5902357.1075855711087.JavaMail.evans@thyme,"Thu, 19 Apr 2001 05:03:00 -0700 (PDT)",gthorse@keyad.com,phillip.k.allen@enron.com,Bishops Corner,"Phillip & Kieth;\n?\nI completed the following documents last night and I forgot to get them \ne-mailed to you, sorry.\n?\nPlease call me later today.\n?\nGreg\n - MapApplicationTeam Budget.xls\n - Phillip Allen 4.18.01.doc","Phillip & Kieth; ? I completed the following documents last night and I forgot to get them e-mailed to you, sorry. ? Please call me later today. ? Greg - MapApplicationTeam Budget.xls - Phillip Allen 4.18.01.doc","[Phillip & Kieth; ? I completed the following documents last night and I forgot to get them e-mailed to you, sorry. ? Please call me later today. ? Greg - MapApplicationTeam Budget.xls - Phillip Allen 4.18.01.doc]",...,"[kieth, mapapplicationteam, mailed, forgot, sorry, night, budget, completed, greg, xl]","I completed the following documents last night and I forgot to get them e-mailed to you, sorry. Greg - MapApplicationTeam Budget.xls - Phillip Allen 4.18.01.doc Please call me later today.","I completed the following documents last night and I forgot to get them e-mailed to you, sorry. Greg - MapApplicationTeam Budget.xls - Phillip Allen 4.18.01.doc Please call me later today.",Greg - MapApplicationTeam Budget.xls - Phillip Allen 4.18.01.doc Please call me later today .,,,,,,
30,allen-p/_sent_mail/286.,"Message-ID: <26482479.1075855691493.JavaMail.evans@thyme>\nDate: Wed, 22 Mar 2000 01:27:00 -0800 (PST)\nFrom: phillip.allen@enron.com\nTo: stephane.brodeur@enron.com\nSubject: Re: Maps\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Stephane Brodeur\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nStephane,\n\n Can you create an e-mail list to distribute your reports everyday to the \nwest desk?\nOr put them on a common drive? We can do the same with our reports. List \nshould include:\n\n Phillip Allen\n Mike Grigsby\n Keith Holst\n Frank Ermis\n Steve South\n Janie Tholt\n Tory Kuykendall\n Matt Lenhart\n Randy Gay\n\nThanks.\n\nPhillip",26482479.1075855691493.JavaMail.evans@thyme,"Wed, 22 Mar 2000 01:27:00 -0800 (PST)",phillip.allen@enron.com,stephane.brodeur@enron.com,Re: Maps,"Stephane,\n\n Can you create an e-mail list to distribute your reports everyday to the \nwest desk?\nOr put them on a common drive? We can do the same with our reports. List \nshould include:\n\n Phillip Allen\n Mike Grigsby\n Keith Holst\n Frank Ermis\n Steve South\n Janie Tholt\n Tory Kuykendall\n Matt Lenhart\n Randy Gay\n\nThanks.\n\nPhillip","Stephane, Can you create an e-mail list to distribute your reports everyday to the west desk? Or put them on a common drive? We can do the same with our reports. List should include: Phillip Allen Mike Grigsby Keith Holst Frank Ermis Steve South Janie Tholt Tory Kuykendall Matt Lenhart Randy Gay Thanks. Phillip","[Stephane, Can you create an e-mail list to distribute your reports everyday to the west desk? Or put them on a common drive? We can do the same with our reports. List should include: Phillip Allen Mike Grigsby Keith Holst Frank Ermis Steve South Janie Tholt Tory Kuykendall Matt Lenhart Randy Gay Thanks. Phillip]",...,"[stephane, report, distribute, list, tory, everyday, janie, common, gay, kuykendall]","We can do the same with our reports. Or put them on a common drive? Stephane, Can you create an e-mail list to distribute your reports everyday to the west desk?","We can do the same with our reports. Or put them on a common drive? Stephane, Can you create an e-mail list to distribute your reports everyday to the west desk?","Stephane, Can you create an e-mail list to distribute your reports everyday to the west desk?",,,,,,
158,allen-p/discussion_threads/537.,"Message-ID: <11592223.1075855711614.JavaMail.evans@thyme>\nDate: Thu, 3 May 2001 03:57:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: rlehmann@yahoo.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: rlehmann@yahoo.com\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_June2001\Notes Folders\Discussion threads\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\nReagan,\n\nJust wanted to give you an update. I have changed the unit mix to include \nsome 1 bedrooms and reduced the number of buildings to 12. Kipp Flores is \nworking on the construction drawings. At the same time I am pursuing FHA \nfinancing. Once the construction drawings are complete I will send them to \nyou for a revised bid. Your original bid was competitive and I am still \nattracted to your firm because of your strong local presence and contacts.\n\nPhillip",11592223.1075855711614.JavaMail.evans@thyme,"Thu, 3 May 2001 03:57:00 -0700 (PDT)",phillip.allen@enron.com,rlehmann@yahoo.com,Mime-Version: 1.0,"Reagan,\n\nJust wanted to give you an update. I have changed the unit mix to include \nsome 1 bedrooms and reduced the number of buildings to 12. Kipp Flores is \nworking on the construction drawings. At the same time I am pursuing FHA \nfinancing. Once the construction drawings are complete I will send them to \nyou for a revised bid. Your original bid was competitive and I am still \nattracted to your firm because of your strong local presence and contacts.\n\nPhillip","Reagan, Just wanted to give you an update. I have changed the unit mix to include some 1 bedrooms and reduced the number of buildings to 12. Kipp Flores is working on the construction drawings. At the same time I am pursuing FHA financing. Once the construction drawings are complete I will send them to you for a revised bid. Your original bid was competitive and I am still attracted to your firm because of your strong local presence and contacts. Phillip","[Reagan, Just wanted to give you an update. I have changed the unit mix to include some 1 bedrooms and reduced the number of buildings to 12. Kipp Flores is working on the construction drawings. At the same time I am pursuing FHA financing. Once the construction drawings are complete I will send them to you for a revised bid. Your original bid was competitive and I am still attracted to your firm because of your strong local presence and contacts. Phillip]",...,"[drawing, bid, construction, presence, attracted, competitive, mix, fha, pursuing, flores]",Your original bid was competitive and I am still attracted to your firm because of your strong local presence and contacts. At the same time I am pursuing FHA financing. I have changed the unit mix to include some 1 bedrooms and reduced the number of buildings to 12.,Your original bid was competitive and I am still attracted to your firm because of your strong local presence and contacts. At the same time I am pursuing FHA financing. I have changed the unit mix to include some 1 bedrooms and reduced the number of buildings to 12.,Your original bid was competitive and I am still attracted to your firm because of your strong local presence and contacts . At the same time I am pursuing FHA financing .,,,,,,
128,allen-p/deleted_items/6.,"Message-ID: <23787693.1075855374494.JavaMail.evans@thyme>\nDate: Fri, 28 Dec 2001 05:36:21 -0800 (PST)\nFrom: subscriptions@intelligencepress.com\nTo: pallen@enron.com\nSubject: NGI Publications - Friday, December 28th 2001\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: subscriptions@intelligencepress.com@ENRON\nX-To: pallen@enron.com\nX-cc: \nX-bcc: \nX-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\Deleted Items\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nDear phillip,\n\n\nThis e-mail is automated notification of the availability of your\ncurrent Natural Gas Intelligence Newsletter(s). Please use your\nusername of ""pallen"" and your password to access\n\n NGI's Daily Gas Price Index\n\nhttp://intelligencepress.com/subscribers/index.html\n\nIf you have forgotten your password please visit\n http://intelligencepress.com/password.html\nand we will send it to you.\n\nIf you would like to stop receiving e-mail notifications when your\npublications are available, please reply to this message with\nREMOVE E-MAIL in the subject line.\n\nThank you for your subscription.\n\nFor information about other Intelligence Press products and services,\nincluding maps and glossaries visit our web site at\nhttp://intelligencepress.com or call toll-free (800) 427-5747.\n\nALL RIGHTS RESERVED. (c) 2001, Intelligence Press, Inc.\n---\n",23787693.1075855374494.JavaMail.evans@thyme,"Fri, 28 Dec 2001 05:36:21 -0800 (PST)",subscriptions@intelligencepress.com,pallen@enron.com,"NGI Publications - Friday, December 28th 2001","Dear phillip,\n\n\nThis e-mail is automated notification of the availability of your\ncurrent Natural Gas Intelligence Newsletter(s). Please use your\nusername of ""pallen"" and your password to access\n\n NGI's Daily Gas Price Index\n\nhttp://intelligencepress.com/subscribers/index.html\n\nIf you have forgotten your password please visit\n http://intelligencepress.com/password.html\nand we will send it to you.\n\nIf you would like to stop receiving e-mail notifications when your\npublications are available, please reply to this message with\nREMOVE E-MAIL in the subject line.\n\nThank you for your subscription.\n\nFor information about other Intelligence Press products and services,\nincluding maps and glossaries visit our web site at\nhttp://intelligencepress.com or call toll-free (800) 427-5747.\n\nALL RIGHTS RESERVED. (c) 2001, Intelligence Press, Inc.\n---","Dear phillip, This e-mail is automated notification of the availability of your current Natural Gas Intelligence Newsletter(s). Please use your username of ""pallen"" and your password to access NGI's Daily Gas Price Index [LINK] If you have forgotten your password please visit [LINK] and we will send it to you. If you would like to stop receiving e-mail notifications when your publications are available, please reply to this message with REMOVE E-MAIL in the subject line. Thank you for your subscription. For information about other Intelligence Press products and services, including maps and glossaries visit our web site at [LINK] or call toll-free [PHONE]. ALL RIGHTS RESERVED. (c) 2001, Intelligence Press, Inc. ---","[Dear phillip, This e-mail is automated notification of the availability of your current Natural Gas Intelligence Newsletter(s). Please use your username of ""pallen"" and your password to access NGI's Daily Gas Price Index [LINK] If you have forgotten your password please visit [LINK] and we will send it to you. If you would like to stop receiving e-mail notifications when your publications are available, please reply to this message with REMOVE E-MAIL in the subject line. Thank you for your subscription. For information about other Intelligence Press products and services, including maps and glossaries visit our web site at [LINK] or call toll-free [PHONE]. ALL RIGHTS RESERVED. (c) 2001, Intelligence Press, Inc. ---]",...,"[intelligence, notification, press, mail, link, password, visit, glossary, forgotten, automated]","(c) 2001, Intelligence Press, Inc. Dear phillip, This e-mail is automated notification of the availability of your current Natural Gas Intelligence Newsletter(s). Please use your username of ""pallen"" and your password to access NGI's Daily Gas Price Index [LINK] If you have forgotten your password please visit [LINK] and we will send it to you.","(c) 2001, Intelligence Press, Inc. Dear phillip, This e-mail is automated notification of the availability of your current Natural Gas Intelligence Newsletter(s). Please use your username of ""pallen"" and your password to access NGI's Daily Gas Price Index [LINK] If you have forgotten your password please visit [LINK] and we will send it to you.","Please use your username of ""pallen"" and your password to access NGI's Daily Gas Price Index . If you have forgotten your password please visit [LINK] and we will send it to you .",,,,,,


In [166]:
extractive_cols = ["extractive_rouge1", "extractive_rouge2", "extractive_rougeL"]
abstractive_cols = ["abstractive_rouge1", "abstractive_rouge2", "abstractive_rougeL"]

extractive_avg = evaluated_df[extractive_cols].mean()
abstractive_avg = evaluated_df[abstractive_cols].mean()

summary_df = pd.DataFrame({
    "Extractive": extractive_avg.values,
    "Abstractive": abstractive_avg.values
}, index=["ROUGE-1", "ROUGE-2", "ROUGE-L"])

print("\nAverage ROUGE Scores:\n")
display(summary_df.round(4))



Average ROUGE Scores:



Unnamed: 0,Extractive,Abstractive
ROUGE-1,0.5809,0.375
ROUGE-2,0.5504,0.3457
ROUGE-L,0.4128,0.3259


In [167]:
extractive_mean = extractive_avg.mean()
abstractive_mean = abstractive_avg.mean()

print("\nWhich performed better on average?")
if extractive_mean > abstractive_mean:
    print(f"Extractive summarization performed better overall ({extractive_mean:.3f} vs {abstractive_mean:.3f})")
elif abstractive_mean > extractive_mean:
    print(f"Abstractive summarization performed better overall ({abstractive_mean:.3f} vs {extractive_mean:.3f})")
else:
    print(f"Both performed equally well on average ({extractive_mean:.3f})")



Which performed better on average?
Extractive summarization performed better overall (0.515 vs 0.349)


In [168]:
avg_extractive = evaluated_df[["extractive_rouge1", "extractive_rouge2", "extractive_rougeL"]].mean()
avg_abstractive = evaluated_df[["abstractive_rouge1", "abstractive_rouge2", "abstractive_rougeL"]].mean()

fig, ax = plt.subplots()
avg_extractive.plot(kind="bar", color="blue", alpha=0.6, label="Extractive", ax=ax)
avg_abstractive.plot(kind="bar", color="green", alpha=0.6, label="Abstractive", ax=ax)

plt.title("Average ROUGE Scores")
plt.ylabel("F1 Score")
plt.legend()
plt.show()


NameError: name 'plt' is not defined

In [None]:
evaluated_df.to_csv("evaluated_summaries_with_rouge.csv", index=False)

In [None]:
run_email_summarizer_gui()
