# Enron emails

## Import files

In [1]:
# from kaggle.api.kaggle_api_extended import KaggleApi
# api = KaggleApi()
# api.authenticate()
# api.dataset_download_files("wcukierski/enron-email-dataset")

In [2]:
import pandas as pd
import numpy as np
import pymongo

In [3]:
emails_df = pd.read_csv("emails.csv", nrows=300)

In [4]:
print(emails_df.shape)
emails_df.head(3)

(300, 2)


Unnamed: 0,file,message
0,allen-p/_sent_mail/1.,Message-ID: <18782981.1075855378110.JavaMail.e...
1,allen-p/_sent_mail/10.,Message-ID: <15464986.1075855378456.JavaMail.e...
2,allen-p/_sent_mail/100.,Message-ID: <24216240.1075855687451.JavaMail.e...


In [5]:
print(emails_df['message'][101])

Message-ID: <13302421.1075855689330.JavaMail.evans@thyme>
Date: Wed, 16 Aug 2000 03:59:00 -0700 (PDT)
From: phillip.allen@enron.com
To: ina.rangel@enron.com
Subject: ENA Management Committee
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: Ina Rangel
X-cc: 
X-bcc: 
X-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail
X-Origin: Allen-P
X-FileName: pallen.nsf

---------------------- Forwarded by Phillip K Allen/HOU/ECT on 08/16/2000 
10:58 AM ---------------------------
   
	
	
	From:  David W Delainey                           08/15/2000 01:28 PM
	

Sent by: Kay Chapman
To: Tim Belden/HOU/ECT@ECT, Kevin M Presto/HOU/ECT@ECT, John 
Arnold/HOU/ECT@ECT, Scott Neal/HOU/ECT@ECT, Hunter S Shively/HOU/ECT@ECT, 
Phillip K Allen/HOU/ECT@ECT, Janet R Dietrich/HOU/ECT@ECT, Christopher F 
Calger/PDX/ECT@ECT, W David Duran/HOU/ECT@ECT, Raymond Bowen/HOU/ECT@ECT, 
Jeff Donahue/HOU/ECT@ECT, Brian Redmond/HOU/ECT@ECT, C John

In [6]:
emails_df.drop_duplicates(subset="message", inplace=True)
emails_df.dropna(axis=1, inplace=True)
print(emails_df.shape)

(300, 2)


### A few figures

In [7]:
print("Number of 'na' values' in dataset messages:")
print(pd.isna(emails_df["message"]).sum())

print("Number of duplicates in dataset messages:")
print(emails_df.duplicated(subset="message").sum())

print("Average email length (medata included)")
print(emails_df["message"].apply(len).mean())
print("Median email length (medata included)")
print(emails_df["message"].apply(len).median())


Number of 'na' values' in dataset messages:
0
Number of duplicates in dataset messages:
0
Average email length (medata included)
1285.76
Median email length (medata included)
749.5


## Let's put all this into a MongoDB database!

In [8]:
client = pymongo.MongoClient(host="127.0.0.1",
                     port=27017)

db = client["simplon"]
col = db["raw_mail"]

In [9]:
# make a Python list with all items
mongo_docs = [{"filename": emails_df["file"].iloc[row],
               "message": emails_df["message"].iloc[row]} for row in range(emails_df.shape[0])]

# mongo_docs[0]

In [10]:
# insert the list of documents into the Mongo DB
result = col.insert_many(mongo_docs)
print(len(result.inserted_ids), "inserted documents.")
print("An example:", col.find_one())

300 inserted documents.
An example: {'_id': ObjectId('5e831695c3a1888d5f8650fa'), 'filename': 'allen-p/_sent_mail/1.', 'message': "Message-ID: <18782981.1075855378110.JavaMail.evans@thyme>\nDate: Mon, 14 May 2001 16:39:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: tim.belden@enron.com\nSubject: \nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Tim Belden <Tim Belden/Enron@EnronXGate>\nX-cc: \nX-bcc: \nX-Folder: \\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent Mail\nX-Origin: Allen-P\nX-FileName: pallen (Non-Privileged).pst\n\nHere is our forecast\n\n "}


## A little of cleaning

(We do the cleaning on the dataframe)

### Pure Python - didn't work

In [11]:
def get_text(row_idx):
    '''Returns the content from an email string object'''
    filename = emails_df["file"].iloc[row_idx]
    msg = emails_df["message"].iloc[row_idx]
    msg_list = msg.split("\n")
    dic = {"filename": filename}
    
    for row in range(len(msg_list)):
        str_row = msg_list[row]

        if str_row != "":
            sep = str_row.find(": ")
            k = str_row[:sep]
            v = str_row[sep+2 :]
            if v != "":
                dic[k] = v
        else:
            dic["body"] = "\n".join(msg_list[row + 1:-1])
            break
    return dic

# test example
#print(get_text(0).keys())

# Check if all the format is correct
def keys_ok(any_dic):
    for key in any_dic.keys():
        if key.find(".") != -1:
#            print(any_dic)
            return False
    return True

all([keys_ok(get_text(i)) for i in range(emails_df.shape[0])])

False

### Regex cleaning

In [12]:
import re

In [13]:
print(emails_df['message'][101])

Message-ID: <13302421.1075855689330.JavaMail.evans@thyme>
Date: Wed, 16 Aug 2000 03:59:00 -0700 (PDT)
From: phillip.allen@enron.com
To: ina.rangel@enron.com
Subject: ENA Management Committee
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: Ina Rangel
X-cc: 
X-bcc: 
X-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail
X-Origin: Allen-P
X-FileName: pallen.nsf

---------------------- Forwarded by Phillip K Allen/HOU/ECT on 08/16/2000 
10:58 AM ---------------------------
   
	
	
	From:  David W Delainey                           08/15/2000 01:28 PM
	

Sent by: Kay Chapman
To: Tim Belden/HOU/ECT@ECT, Kevin M Presto/HOU/ECT@ECT, John 
Arnold/HOU/ECT@ECT, Scott Neal/HOU/ECT@ECT, Hunter S Shively/HOU/ECT@ECT, 
Phillip K Allen/HOU/ECT@ECT, Janet R Dietrich/HOU/ECT@ECT, Christopher F 
Calger/PDX/ECT@ECT, W David Duran/HOU/ECT@ECT, Raymond Bowen/HOU/ECT@ECT, 
Jeff Donahue/HOU/ECT@ECT, Brian Redmond/HOU/ECT@ECT, C John

In [14]:
def msg_to_dic(cmplte_msg):
    meta_and_body = re.split(r'\n\n', cmplte_msg, maxsplit=1)
    meta = meta_and_body[0]
    body = meta_and_body[1]
    meta_keys = re.findall(r'(.+): .+', meta)
    meta_vals = re.findall(r'.+: (.+)', meta)

    dic = dict(zip(meta_keys, meta_vals))
    dic["body"] = body
    return dic

msg_to_dic(emails_df['message'][101])

{'Message-ID': '<13302421.1075855689330.JavaMail.evans@thyme>',
 'Date': 'Wed, 16 Aug 2000 03:59:00 -0700 (PDT)',
 'From': 'phillip.allen@enron.com',
 'To': 'ina.rangel@enron.com',
 'Subject': 'ENA Management Committee',
 'Mime-Version': '1.0',
 'Content-Type': 'text/plain; charset=us-ascii',
 'Content-Transfer-Encoding': '7bit',
 'X-From': 'Phillip K Allen',
 'X-To': 'Ina Rangel',
 'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
 'X-Origin': 'Allen-P',
 'X-FileName': 'pallen.nsf',
 'body': '---------------------- Forwarded by Phillip K Allen/HOU/ECT on 08/16/2000 \n10:58 AM ---------------------------\n   \n\t\n\t\n\tFrom:  David W Delainey                           08/15/2000 01:28 PM\n\t\n\nSent by: Kay Chapman\nTo: Tim Belden/HOU/ECT@ECT, Kevin M Presto/HOU/ECT@ECT, John \nArnold/HOU/ECT@ECT, Scott Neal/HOU/ECT@ECT, Hunter S Shively/HOU/ECT@ECT, \nPhillip K Allen/HOU/ECT@ECT, Janet R Dietrich/HOU/ECT@ECT, Christopher F \nCalger/PDX/ECT@ECT, W David Duran/HOU/ECT@E

In [17]:
col2 = db["parsed_mail"]

# Erase the collection's record - we should insert into a blank database
if col2.count() > 0:
    col2.drop()

# create the list of documents to be created
mongo_docs = [{"filename": emails_df["file"][row],
               "message": msg_to_dic(emails_df["message"][row])} for row in range(len(emails_df))]

  after removing the cwd from sys.path.


In [18]:
# insert the list of documents into the Mongo DB
result = col2.insert_many(mongo_docs)
print(len(result.inserted_ids), "inserted documents.")

300 inserted documents.


In [19]:
print("An example:", col2.find_one())

An example: {'_id': ObjectId('5e8361ab981c5fffc40cf5cb'), 'filename': 'allen-p/_sent_mail/1.', 'message': {'Message-ID': '<18782981.1075855378110.JavaMail.evans@thyme>', 'Date': 'Mon, 14 May 2001 16:39:00 -0700 (PDT)', 'From': 'phillip.allen@enron.com', 'To': 'tim.belden@enron.com', 'Mime-Version': '1.0', 'Content-Type': 'text/plain; charset=us-ascii', 'Content-Transfer-Encoding': '7bit', 'X-From': 'Phillip K Allen', 'X-To': 'Tim Belden <Tim Belden/Enron@EnronXGate>', 'X-Folder': "\\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent Mail", 'X-Origin': 'Allen-P', 'X-FileName': 'pallen (Non-Privileged).pst', 'body': 'Here is our forecast\n\n '}}


Seems to be working! :-)

## Querying MongoDB in Python

In [21]:
import pprint

In [22]:
pprint.pprint(col2.find_one({"filename": "allen-p/_sent_mail/1."}))

{'_id': ObjectId('5e8361ab981c5fffc40cf5cb'),
 'filename': 'allen-p/_sent_mail/1.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 14 May 2001 16:39:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<18782981.1075855378110.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'To': 'tim.belden@enron.com',
             'X-FileName': 'pallen (Non-Privileged).pst',
             'X-Folder': "\\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent "
                         'Mail',
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Tim Belden <Tim Belden/Enron@EnronXGate>',
             'body': 'Here is our forecast\n\n '}}


In [23]:
for mail in col2.find():
    pprint.pprint(mail)

{'_id': ObjectId('5e8361ab981c5fffc40cf5cb'),
 'filename': 'allen-p/_sent_mail/1.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 14 May 2001 16:39:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<18782981.1075855378110.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'To': 'tim.belden@enron.com',
             'X-FileName': 'pallen (Non-Privileged).pst',
             'X-Folder': "\\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent "
                         'Mail',
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Tim Belden <Tim Belden/Enron@EnronXGate>',
             'body': 'Here is our forecast\n\n '}}
{'_id': ObjectId('5e8361ab981c5fffc40cf5cc'),
 'filename': 'allen-p/_sent_mail/10.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain

             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Tue, 3 Oct 2000 09:15:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<29177675.1075855687692.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Meeting re': 'Storage Strategies in the West',
             'To': 'ina.rangel@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Ina Rangel',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 10/03/2000 \n'
                     '04:13 PM ---------------------------\n'
                     '\n'
                     '\n'
                     'Nancy Hall@ENRON\n'
                     '10/02/2000 06:42 AM\n'
                     'To: Mark Whitt/NA/Enron@E

 'filename': 'allen-p/_sent_mail/119.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Tue, 26 Sep 2000 07:01:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<10523086.1075855687873.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'To': 'ina.rangel@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Ina Rangel',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 09/26/2000 \n'
                     '02:00 PM ---------------------------\n'
                     '\n'
                     '\n'
                     '\tReschedule\n'
                     'Chairperson: Richard Burchfield\n'
                  

                     '\n'
                     ' Ownership Structure\n'
                     '\n'
                     ' What will be the ownership structure? Limited '
                     'partnership? General partner?\n'
                     '\n'
                     ' What are all the legal entities that will be involved '
                     'and in what \n'
                     'capacity(regarding ownership and \n'
                     ' liabilities)?\n'
                     '\n'
                     ' Who owns the land? improvements?\n'
                     '\n'
                     ' Who holds the various loans?\n'
                     '\n'
                     ' Is the land collateral?\n'
                     '\n'
                     ' Investment\n'
                     ' \n'
                     ' What happens to initial investment?\n'
                     '\n'
                     ' Is it used to purchase land for cash?Secure future '
                     'loans?\n'
      

                     'Please respond to <cbpres@austin.rr.com>\n'
                     'To: "Phillip Allen" <pallen@enron.com>\n'
                     'cc: "Larry Lewter" <retwell@mail.sanmarcos.net>, "Diana '
                     'Zuniga" \n'
                     '<invest@bga.com> \n'
                     'Subject: Sagewood Town Homes\n'
                     '\n'
                     '\n'
                     'I was aware that Regan Lehman, the lot developer for the '
                     'entire 70 lot\n'
                     "duplex project, was selling his units in the $180's,  He "
                     'does have a much\n'
                     'lower basis in the lots than anyone else, but the prime '
                     'differences are due\n'
                     'to a) he is selling them during construction and b) they '
                     'are smaller units.\n'
                     'We do not know the exact size of each of his units, but '
                     'we believe o

 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Tue, 5 Sep 2000 06:51:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<17954197.1075855688641.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: RE': 'Receipt of Team Selection Form - Executive Impact '
                            '& Influence',
             'To': 'ina.rangel@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Ina Rangel',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 09/05/2000 \n'
                     '01:50 PM ---------------------------\n'
                     '\n'
                     '\n'
                     '"Christi

{'_id': ObjectId('5e8361ab981c5fffc40cf61a'),
 'filename': 'allen-p/_sent_mail/166.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 28 Aug 2000 09:30:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<9552654.1075855688898.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'To': 'bs_stone@yahoo.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'bs_stone@yahoo.com',
             'body': 'Brenda\n'
                     '\n'
                     ' Can you send me your address in College Station.\n'
                     '\n'
                     'Phillip'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf61b'),
 'filename': 'allen-p/_sent_mail/167.',
 'message': {'Content-

                     'for you to practice \n'
                     'excel.\n'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf629'),
 'filename': 'allen-p/_sent_mail/18.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 30 Apr 2001 14:22:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<22040365.1075855378763.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject': 'Request from Steve Kean',
             'To': 'keith.holst@enron.com',
             'X-FileName': 'pallen (Non-Privileged).pst',
             'X-Folder': "\\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent "
                         'Mail',
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Keith Holst <Keith Holst/HOU/ECT@ect>',
             'body': '\n'
                     '---------------------- Forwarded by Phillip K '


             'To': 'stagecoachmama@hotmail.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'stagecoachmama@hotmail.com',
             'body': 'Lucy,\n'
                     '\n'
                     ' Now that #44 is rented and you have settled in for a '
                     'couple of months, we \n'
                     'need to focus on expenses and recordkeeping.\n'
                     '\n'
                     ' First, I want to implement the following changes:\n'
                     '\n'
                     '  1.  No Overtime without my written (or email) '
                     'instructions.\n'
                     '  2.  Daily timesheets for you and Wade faxed to me '
                     'daily\n'
                     '  3.  Paychecks will be issued each Friday by me at the '
                     'St

{'_id': ObjectId('5e8361ab981c5fffc40cf64d'),
 'filename': 'allen-p/_sent_mail/211.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Wed, 19 Jul 2000 03:39:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<20376172.1075855689875.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject': 'Interactive Information Resource',
             'To': 'hunter.shively@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Hunter S Shively',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 07/19/2000 \n'
                     '10:39 AM ---------------------------\n'
                     '\n'
                     '\

             'X-To': 'Gary Taylor',
             'body': 'gary,\n\n thanks for the info.'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf663'),
 'filename': 'allen-p/_sent_mail/231.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Fri, 7 Jul 2000 06:41:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<23290954.1075855690306.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': 'Memory',
             'To': 'felix.buitron@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Felix Buitron',
             'body': 'Anytime after 3 p.m.'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf664'),
 'filename': 'allen-p/_sent_mail/232.',
 'message': {'Content-Transfer-Encodin

             'Message-ID': '<12786049.1075855378908.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': "FERC's Prospective Mitigation and Monitoring Plan "
                            'for CA',
             'To': 'ray.alvarez@enron.com',
             'X-FileName': 'pallen (Non-Privileged).pst',
             'X-Folder': "\\Phillip_Allen_Jan2002_1\\Allen, Phillip K.\\'Sent "
                         'Mail',
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Ray Alvarez <Ray Alvarez/NA/Enron@ENRON>',
             'body': 'Ray,\n'
                     '\n'
                     'Is there any detail on the gas cost proxy.  Which '
                     'delivery points from which publication will be used?  '
                     'Basically, can you help us get any clarification on the '
                     'language "the average daily cost of gas for all delivery '
                     'points in California

                     '($3,199)          \n'
                     '($2,769)   ($273)\n'
                     'Rec:   Ques/Clay Basin/0184   NWPL/Opal 543  NWPL/Opal   '
                     'Sumas      NWPL/S of \n'
                     'Gr Rvr\n'
                     'Del:   NWPL/S of Green River/Clay   Ques/Clay '
                     'Basin/0852   Ques/Clay \n'
                     'Basin/0852   Ques/Clay Basin    Ques/Clay Basin\n'
                     '1        329       8,738\n'
                     '2     1,500\n'
                     '3         2,974    11,362\n'
                     '4         6,741    12,349       1,439\n'
                     '5       19,052          3,183\n'
                     '9            333\n'
                     '13  30,863       2,680\n'
                     '14  30,451    \n'
                     '15  35,226    \n'
                     '16         6,979              235 \n'
                     '17  17,464     \n'
                     '18    9,294 

             'Subject': 'Western Strategy Session Materials',
             'To': 'hector.campos@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Hector Campos',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 04/26/2000 \n'
                     '08:36 AM ---------------------------\n'
                     '\n'
                     '\n'
                     'TIM HEIZENRADER\n'
                     '04/25/2000 11:43 AM\n'
                     'To: Jim Fallon/HOU/ECT@ECT, Phillip K Allen/HOU/ECT@ECT\n'
                     'cc: Tim Belden/HOU/ECT@ECT \n'
                     'Subject: Western Strategy Session Materials\n'
                     '\n'
                     "Today's charts are attached: \n"}}
{'_id': ObjectId('5e8361ab981c5fffc40cf68d'),


{'_id': ObjectId('5e8361ab981c5fffc40cf6a2'),
 'filename': 'allen-p/_sent_mail/289.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 20 Mar 2000 00:58:00 -0800 (PST)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<22191798.1075855691557.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject': 'Re:',
             'To': 'maryrichards7@hotmail.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': '"mary richards" <maryrichards7@hotmail.com> @ ENRON',
             'body': 'Mary \n'
                     '\n'
                     'I was out of the office on friday.\n'
                     '\n'
                     'I will call you about wade later today\n'
                     

             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Matthew Lenhart',
             'body': '---------------------- Forwarded by Phillip K '
                     'Allen/HOU/ECT on 03/03/2000 \n'
                     '08:36 AM ---------------------------\n'
                     '\n'
                     '\n'
                     '"the shockwave.com team" '
                     '<shockwave.com@shockwave.m0.net> on 03/03/2000 \n'
                     '12:29:38 AM\n'
                     'Please respond to shockwave.com@shockwave.m0.net\n'
                     'To: pallen@enron.com\n'
                     'cc:  \n'
                     'Subject: Just Released! Exclusive new animation from '
                     'Stan Lee\n'
                     '\n'
                     '\n'
                     '\n'
                     'Dear Phillip, \n'
                     '\n'


                     ' Can you please call my credit desk at 713-853-1803.  '
                     'They have not received \n'
                     'any financials for ACN Power.  \n'
                     '\n'
                     'Thanks,\n'
                     '\n'
                     'Phillip Allen'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf6c7'),
 'filename': 'allen-p/_sent_mail/322.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 7 Feb 2000 07:39:00 -0800 (PST)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<17093085.1075855692300.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': 'January El paso invoice',
             'To': 'kimberly.olinger@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
        

{'_id': ObjectId('5e8361ab981c5fffc40cf6dc'),
 'filename': 'allen-p/_sent_mail/342.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Thu, 6 Jan 2000 07:19:00 -0800 (PST)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<12810713.1075855692730.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': 'receipts',
             'To': 'maryrichards7@hotmail.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': '"mary richards" <maryrichards7@hotmail.com> @ ENRON',
             'body': 'received the file.  It worked.  Good job.'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf6dd'),
 'filename': 'allen-p/_sent_mail/343.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Con

 'filename': 'allen-p/_sent_mail/354.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Fri, 4 May 2001 05:18:00 -0700 (PDT)',
             'From': 'phillip.allen@enron.com',
             'Message-ID': '<14983560.1075855723797.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': 'Western Wholesale Activities - Gas & Power Conf. '
                            'Call',
             'To': 'mike.grigsby@enron.com, keith.holst@enron.com, '
                   'frank.ermis@enron.com, ',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_June2001\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Mike Grigsby, Keith Holst, Frank Ermis, Jane M Tholt, '
                     'Jay Reitmeyer, Tori Kuykendall, Matthew Lenhart',
             'body': '---------------------- For

             'Message-ID': '<6235729.1075855723988.JavaMail.evans@thyme>',
             'Mime-Version': '1.0',
             'Subject: Re': 'Request from Steve Kean',
             'To': 'alan.comnes@enron.com',
             'X-FileName': 'pallen.nsf',
             'X-Folder': "\\Phillip_Allen_June2001\\Notes Folders\\'sent mail",
             'X-From': 'Phillip K Allen',
             'X-Origin': 'Allen-P',
             'X-To': 'Alan Comnes',
             'body': 'Alan, \n'
                     '\n'
                     'You should have received updated numbers from Keith '
                     'Holst.  Call me if you \n'
                     'did not receive them.\n'
                     '\n'
                     'Phillip'}}
{'_id': ObjectId('5e8361ab981c5fffc40cf6f2'),
 'filename': 'allen-p/_sent_mail/363.',
 'message': {'Content-Transfer-Encoding': '7bit',
             'Content-Type': 'text/plain; charset=us-ascii',
             'Date': 'Mon, 30 Apr 2001 04:22:00 -0700 (PDT)',
       