# Starter code for exploring the Enron dataset (emails + finances)

Loads up the dataset (pickled dict of dicts)

The dataset has the form: `enron_data["LASTNAME FIRSTNAME MIDDLEINITIAL"] = { features_dict }`

`{features_dict}` is a dictionary of features associated with that person.

You should explore `features_dict` as part of the mini-project, but here's an example to get you started:

`enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000`

In [4]:
import pickle

with open('../17-final-project/final_project_dataset_unix.pkl', 'rb') as f:
    enron_data = pickle.load(f)

## ↓↓↓ Your Code Goes Here ↓↓↓

In [5]:
# how many people are in the dataset?
print(len(enron_data))

146


In [6]:
# how many features are available for each person?
print(len(enron_data["LAY KENNETH L"]))

21


In [7]:
# how many persons of interest in the dataset?
poi = [k for k in enron_data if enron_data[k]["poi"] == 1]
print(len(poi))

18


In [8]:
# what is the value of stock belonging to James Prentice?
print(enron_data["PRENTICE JAMES"])

{'salary': 'NaN', 'to_messages': 'NaN', 'deferral_payments': 564348, 'total_payments': 564348, 'loan_advances': 'NaN', 'bonus': 'NaN', 'email_address': 'james.prentice@enron.com', 'restricted_stock_deferred': 'NaN', 'deferred_income': 'NaN', 'total_stock_value': 1095040, 'expenses': 'NaN', 'from_poi_to_this_person': 'NaN', 'exercised_stock_options': 886231, 'from_messages': 'NaN', 'other': 'NaN', 'from_this_person_to_poi': 'NaN', 'poi': False, 'long_term_incentive': 'NaN', 'shared_receipt_with_poi': 'NaN', 'restricted_stock': 208809, 'director_fees': 'NaN'}


In [9]:
# how many messages from Wesley Colwell to poi?
print(enron_data["COLWELL WESLEY"])

{'salary': 288542, 'to_messages': 1758, 'deferral_payments': 27610, 'total_payments': 1490344, 'loan_advances': 'NaN', 'bonus': 1200000, 'email_address': 'wes.colwell@enron.com', 'restricted_stock_deferred': 'NaN', 'deferred_income': -144062, 'total_stock_value': 698242, 'expenses': 16514, 'from_poi_to_this_person': 240, 'exercised_stock_options': 'NaN', 'from_messages': 40, 'other': 101740, 'from_this_person_to_poi': 11, 'poi': True, 'long_term_incentive': 'NaN', 'shared_receipt_with_poi': 1132, 'restricted_stock': 698242, 'director_fees': 'NaN'}


In [10]:
# what’s the value of stock options exercised by Jeffrey K Skilling?
print(enron_data["SKILLING JEFFREY K"])

{'salary': 1111258, 'to_messages': 3627, 'deferral_payments': 'NaN', 'total_payments': 8682716, 'loan_advances': 'NaN', 'bonus': 5600000, 'email_address': 'jeff.skilling@enron.com', 'restricted_stock_deferred': 'NaN', 'deferred_income': 'NaN', 'total_stock_value': 26093672, 'expenses': 29336, 'from_poi_to_this_person': 88, 'exercised_stock_options': 19250000, 'from_messages': 108, 'other': 22122, 'from_this_person_to_poi': 30, 'poi': True, 'long_term_incentive': 1920000, 'shared_receipt_with_poi': 2042, 'restricted_stock': 6843672, 'director_fees': 'NaN'}


In [15]:
# of lay, skilling, and fastow, who took home the most money (total_payments feature)
print("lay:", enron_data["LAY KENNETH L"]["total_payments"])
print("skilling:", enron_data["SKILLING JEFFREY K"]["total_payments"])
print("fastow:", enron_data["FASTOW ANDREW S"]["total_payments"])

lay: 103559793
skilling: 8682716
fastow: 2424083


In [17]:
# how many people in the dataset have a known salary?
print("salaries:", len([k for k in enron_data if enron_data[k]["salary"] != "NaN"]))

# how many people have a known email address?
print("emails:", len([k for k in enron_data if enron_data[k]["email_address"] != "NaN"]))

salaries: 95
emails: 111


In [18]:
# How many people in the E+F dataset (as it currently exists) have “NaN” for their total payments?
missing_payments = len([k for k in enron_data if enron_data[k]["total_payments"] == "NaN"])
print("payments missing:", missing_payments)

# What percentage of people in the dataset as a whole is this?
print(missing_payments / 146)

payments missing: 21
0.14383561643835616


In [19]:
# How many POIs in the E+F dataset have “NaN” for their total payments?
poi_missing_payments = len([k for k in enron_data if enron_data[k]["poi"] == True and enron_data[k]["total_payments"] == "NaN"])
print("poi payments missing:", poi_missing_payments)

# What percentage of POI’s as a whole is this?
total_pois = len([k for k in enron_data if enron_data[k]["poi"] == True])
print(poi_missing_payments / total_pois)

poi payments missing: 0
0.0
