# Enron Email Analysis

In [1]:
import requests
import pandas as pd
from dateutil import parser
import json

# Define the Elasticsearch host
host = 'http://18.188.56.207:9200/'

# Verify connection and check if 'enron' index exists
response = requests.get(host + '_cat/indices/enron', headers={'Content-Type': 'application/json'})
if response.status_code == 200 and 'enron' in response.text:
    print("Successfully connected to the 'enron' index.")
else:
    print("Failed to connect to the 'enron' index. Please check the host URL and index name.")

Successfully connected to the 'enron' index.


In [2]:
def elasticsearch_results_to_df(results):
    '''
    Converts Elasticsearch results to a pandas DataFrame.
    '''
    hits = results.json()['hits']['hits']
    data = pd.DataFrame([i['_source'] for i in hits], index=[i['_id'] for i in hits])
    data['date'] = data['date'].apply(parser.parse)
    return data

def print_df_row(row):
    '''
    Prints a formatted row from the DataFrame.
    '''
    print('____________________')
    print(f"RE: {row.get('subject', '')}")
    print(f"At: {row.get('date', '')}")
    print(f"From: {row.get('sender', '')}")
    print(f"To: {row.get('recipients', '')}")
    print(f"CC: {row.get('cc', '')}")
    print(f"BCC: {row.get('bcc', '')}")
    print(f"Body:\n{row.get('text', '')}")
    print('____________________')

## Match Financial Keywords

In [4]:
# Define a focused Elasticsearch query to retrieve the top 20 most relevant emails
doc_top20 = {
    "query": {
        "bool": {
            "must": [
                {
                    "bool": {
                        "should": [
                            {"match": {"text": "off balance sheet"}},
                            {"match": {"text": "Special Purpose Entity"}},
                            {"match": {"text": "Special Purpose Vehicle"}},
                            {"match": {"text": "SPE"}},
                            {"match": {"text": "hide debt"}},
                            {"match": {"text": "inflate earnings"}},
                            {"match": {"text": "additional earnings"}},
                            {"match": {"text": "creative accounting"}},
                            {"match": {"text": "generate earnings"}},
                            {"match": {"text": "earnings management"}},
                            {"match": {"text": "revenue recognition"}},
                            {"match": {"text": "mark-to-market"}},
                            {"match": {"text": "conceal liabilities"}},
                            {"match_phrase": {"text": "financial manipulation"}},
                            {"match_phrase": {"text": "balance sheet"}},
                            {"match_phrase": {"text": "income statement"}},
                            {"match_phrase": {"text": "revenue recognition"}},
                            {"match_phrase": {"text": "earnings forecast"}},
                            {"match_phrase": {"text": "balance sheet change"}},
                            {"match_phrase": {"text": "financial statement"}},
                            {"match": {"text": "pro forma earnings"}},
                            {"match": {"text": "deferred revenue"}},
                            {"match_phrase": {"text": "deferred expenses"}}
                        ],
                        "minimum_should_match": 2
                    }
                },
                {
                    "bool": {
                        "should": [
                            {"match": {"text": "confidential"}},
                            {"match": {"text": "urgent matter"}},
                            {"match": {"text": "strategy meeting"}},
                            {"match": {"text": "financial review"}},
                            {"match": {"text": "need approval"}},
                            {"match": {"text": "Ken Lay"}},
                            {"match": {"text": "Jeff Skilling"}},
                            {"match": {"text": "Andrew Fastow"}},
                            {"match": {"text": "Rebecca Mark"}},
                            {"match": {"text": "David Duncan"}},
                            {"match": {"text": "Frank Keane"}},
                            {"match": {"text": "Finance Department"}},
                            {"match": {"text": "Accounting Department"}},
                            {"match": {"text": "Legal Department"}},
                            {"match": {"text": "Risk Management"}},
                            {"match": {"text": "Compliance Office"}}
                        ],
                        "minimum_should_match": 1
                    }
                }
            ],
            "filter": [
                {
                    "range": {
                        "date": {
                            "gte": "1998-01-01",
                            "lte": "2000-12-31"      #This filter is to avoid returning news.
                        }
                    }
                }
            ],
            "must_not": [
                {"match": {"text": "personal"}},
                {"match": {"text": "holiday"}},
                {"match": {"text": "lunch"}},
                {"match": {"text": "jobs"}},
                {"match": {"text": "opening"}},
                {"match": {"text": "WSJ"}},
                {"match": {"text": "Seminar"}},
                {"match": {"text": "meeting agenda"}},              
                {"match_phrase": {"text": "social event"}},
                {"match_phrase": {"text": "team building"}},
                {"match_phrase": {"text": "your consideration"}},
                {"match_phrase": {"text": "TEXAS JOURNAL"}},
                {"match_phrase": {"text": "Wall Street Journal"}},
                {"match_phrase": {"text": "forwarded by"}},
                {"match_phrase": {"text": "forwarded message"}},
                {"match": {"text": "forward"}},
                {"match": {"text": "fw:"}},
                {"match": {"text": "fwd:"}},
                {"match_phrase": {"text": "----- forwarded by"}},
                {"match_phrase": {"text": "please see the forwarded message"}},
                {"match_phrase": {"text": "forward this email"}}
            ]
        }
    },
    "from": 0,
    "size": 20,  # Retrieve top 20 results
}

# Execute the top 5 search query
r_top20 = requests.get(
    host + 'enron/_search',
    data=json.dumps(doc_top20),
    headers={'Content-Type': 'application/json'}
)

# Raise an exception if the request was unsuccessful
r_top20.raise_for_status()

# Extract and display the total number of matching messages
# Handling different Elasticsearch versions
total_hits_top20 = r_top20.json()['hits']['total']
if isinstance(total_hits_top20, dict):
    total_matches_top20 = total_hits_top20.get('value', 0)
else:
    total_matches_top20 = total_hits_top20
print(f"Found {total_matches_top20} messages matching the top 20 query.")

# Convert the results to a DataFrame
df_top20 = elasticsearch_results_to_df(r_top20)
print(f"Returned {df_top20.shape[0]} messages.")


Found 4340 messages matching the top 20 query.
Returned 20 messages.


In [5]:
# Print the top 5 matching emails as samples
if df_top20.empty:
    print("No matching emails found with the current query parameters.")
else:
    for idx in range(df_top20.shape[0]):
        print_df_row(df_top20.iloc[idx])


____________________
RE: 2001 Goals and Objectives
At: 2000-12-12 17:32:00+00:00
From: james.centilli@enron.com
To: rod.hayslett@enron.com
CC: dave.waymire@enron.com
BCC: dave.waymire@enron.com
Body:
1.Develop creative and cost saving ideas to generate additional earnings. 

2.  Coordinate administrative and analytical support for the monetization of 
a significant level of non-strategic assets.   Include in the analysis all 
appropriate contacts within ET&S, Arthur Andersen experts and corporate 
accounting contacts.

3.  Perform economic analysis of projects and asset sales  to provide the 
management team a complete risk assess evaluation on all projects.   Provide 
financial support to Marketing in developing financing structures and 
evaluation process for structured products.  Update evaluation as factors 
change, and  provide information to appropriate contacts within ET&S.

Other Ideas for goals and objectives:

Developing Economic Analysis utilizing the Revenue Management info