In [1]:
import pandas as pd

The following are the Annual Summary Reports for the years 2014-2019:

In [2]:
file_list = ["../datasets/raw_data/2014_Annual_Summary.csv",
            "../datasets/raw_data/2015_Annual_Summary.csv",
            "../datasets/raw_data/2016_Annual_Summary.csv",
            "../datasets/raw_data/2017_Annual_Summary.csv",
            "../datasets/raw_data/2018_Annual_Summary.csv",
            "../datasets/raw_data/2019_Annual_Summary.csv"]

Our function below will make sure to join all of our data into a single dataframe. We are interested in outages that were caused by "Severe Weather" and were in "Massachusetts".

In [3]:
def get_outage_data(filenames):
    outage_dfs = []
    for file in filenames:
        outages_year = pd.read_csv(file)
        
        #Sometimes, files has extra white space and are not read in properly. This makes sure that all 
        #header columns are the same 
        if "Month" not in outages_year.columns:
            outages_year = pd.read_csv(file, header = 1)
        
        #Dropping any whitespace that is read in as either a row or column
        outages_year.dropna(how = 'all', inplace = True)
        outages_year.dropna(axis = 1, how = 'all', inplace = True)
        
        
        #Filtering by Severe Weather in Mass and adding it to a list of desired
        outages_year = outages_year[(outages_year["Event Type"] == "Severe Weather") & (outages_year["Area Affected"].str.contains("Mass")) ]
        outage_dfs.append(outages_year)
        outage_dfs
    #Combining our data into a single dataframe and dropping redundant information    
    outages_combined = pd.concat(outage_dfs, sort= False)
    outages_combined.drop(columns= ['Demand Loss (MW)', 'Month'], inplace = True)
    

    return outages_combined


Our Finalized Data:

In [4]:
outages_data = get_outage_data(file_list)
outages_data

Unnamed: 0,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Area Affected,NERC Region,Event Type,Number of Customers Affected,Alert Criteria
197,10/22/2014,10:46 PM,10/22/2014,10:47 PM,"New Hampshire, Maine, Massachusetts, Rhode Isl...",NPCC,Severe Weather,66650,
63,6/23/2015,6:30 PM,6/24/2015,5:00 AM,"Connecticut, Maine, Massachusetts, New Hampshi...",NPCC,Severe Weather,62442,"Loss of electric service to more than 50,000 c..."
93,8/4/2015,7:17 AM,8/5/2015,12:52 PM,Massachusetts: Rhode Island:,NPCC,Severe Weather,132000,"Loss of electric service to more than 50,000 c..."
72,7/22/2016,11:50 PM,7/23/2016,9:10 AM,Massachusetts: Connecticut: Rhode Island: New ...,NPCC,Severe Weather,57058,"Loss of electric service to more than 50,000 c..."
74,7/23/2016,7:30 PM,7/24/2016,7:30 AM,Connecticut: Massachusetts: New Hampshire: Ver...,NPCC,Severe Weather,101073,"Loss of electric service to more than 50,000 c..."
100,9/11/2016,12:05 PM,9/11/2016,3:10 PM,Connecticut: Massachusetts: New Hampshire: Rho...,NPCC,Severe Weather,57960,"Loss of electric service to more than 50,000 c..."
13,2/9/2017,4:05 PM,2/10/2017,5:15 AM,Connecticut: Massachusetts: Rhode Island:,NPCC,Severe Weather,11525,"Loss of electric service to more than 50,000 c..."
22,03/02/2017,12:20 PM,03/02/2017,11:45 PM,Connecticut: Maine: Massachusetts: New Hampshi...,NPCC,Severe Weather,54316,"Loss of electric service to more than 50,000 c..."
30,03/14/2017,12:32 PM,Unknown,Unknown,Connecticut: Massachusetts: Rhode Island: New ...,NPCC,Severe Weather,69647,"Loss of electric service to more than 50,000 c..."
126,10/29/2017,11:40 PM,11/01/2017,6:08 PM,Connecticut: Massachusetts: New Hampshire: Mai...,NPCC,Severe Weather,310453,"Loss of electric service to more than 50,000 c..."


We will be dropping rows with Unknown Date of Restoration because we cannot determine when a blackout ended and cannot tag our data

In [5]:
outages_data = outages_data.loc[outages_data['Date of Restoration'] != 'Unknown']
outages_data

Unnamed: 0,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Area Affected,NERC Region,Event Type,Number of Customers Affected,Alert Criteria
197,10/22/2014,10:46 PM,10/22/2014,10:47 PM,"New Hampshire, Maine, Massachusetts, Rhode Isl...",NPCC,Severe Weather,66650,
63,6/23/2015,6:30 PM,6/24/2015,5:00 AM,"Connecticut, Maine, Massachusetts, New Hampshi...",NPCC,Severe Weather,62442,"Loss of electric service to more than 50,000 c..."
93,8/4/2015,7:17 AM,8/5/2015,12:52 PM,Massachusetts: Rhode Island:,NPCC,Severe Weather,132000,"Loss of electric service to more than 50,000 c..."
72,7/22/2016,11:50 PM,7/23/2016,9:10 AM,Massachusetts: Connecticut: Rhode Island: New ...,NPCC,Severe Weather,57058,"Loss of electric service to more than 50,000 c..."
74,7/23/2016,7:30 PM,7/24/2016,7:30 AM,Connecticut: Massachusetts: New Hampshire: Ver...,NPCC,Severe Weather,101073,"Loss of electric service to more than 50,000 c..."
100,9/11/2016,12:05 PM,9/11/2016,3:10 PM,Connecticut: Massachusetts: New Hampshire: Rho...,NPCC,Severe Weather,57960,"Loss of electric service to more than 50,000 c..."
13,2/9/2017,4:05 PM,2/10/2017,5:15 AM,Connecticut: Massachusetts: Rhode Island:,NPCC,Severe Weather,11525,"Loss of electric service to more than 50,000 c..."
22,03/02/2017,12:20 PM,03/02/2017,11:45 PM,Connecticut: Maine: Massachusetts: New Hampshi...,NPCC,Severe Weather,54316,"Loss of electric service to more than 50,000 c..."
126,10/29/2017,11:40 PM,11/01/2017,6:08 PM,Connecticut: Massachusetts: New Hampshire: Mai...,NPCC,Severe Weather,310453,"Loss of electric service to more than 50,000 c..."
32,03/02/2018,1:51 PM,03/05/2018,1:18 PM,Connecticut: Massachusetts: Rhode Island:,NPCC,Severe Weather,325000,"Loss of electric service to more than 50,000 c..."


In [6]:
outages_data.to_csv("../datasets/outages_since_2014.csv", index = False)