In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
data = pd.read_csv('data/power_outage_data.csv')
data

Unnamed: 0,Event Description,Year,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags
0,Severe Weather - Thunderstorms,2014,6/30/2014,8:00 PM,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm"
1,Severe Weather - Thunderstorms,2014,6/30/2014,11:20 PM,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm"
2,Severe Weather - Thunderstorms,2014,6/30/2014,5:55 PM,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm"
3,Fuel Supply Emergency - Coal,2014,6/27/2014,1:21 PM,Unknown,Unknown,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal"
4,Physical Attack - Vandalism,2014,6/24/2014,2:54 PM,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical"
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,4:00 p.m.,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption
1648,Vandalism,2000,3/14/2000,9:06 p.m.,,,Alliant Energy,Maine,MAIN,,,vandalism
1649,Ice Storm,2000,1/29/2000,10:00 p.m.,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm"
1650,Ice Storm,2000,1/24/2000,7:00 p.m.,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm"


## Time and Date Columns Handling

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1652 entries, 0 to 1651
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   Event Description             1652 non-null   object
 1   Year                          1652 non-null   int64 
 2   Date Event Began              1652 non-null   object
 3   Time Event Began              1643 non-null   object
 4   Date of Restoration           1638 non-null   object
 5   Time of Restoration           1632 non-null   object
 6   Respondent                    1652 non-null   object
 7   Geographic Areas              1651 non-null   object
 8   NERC Region                   1650 non-null   object
 9   Demand Loss (MW)              1246 non-null   object
 10  Number of Customers Affected  1434 non-null   object
 11  Tags                          1651 non-null   object
dtypes: int64(1), object(11)
memory usage: 155.0+ KB


In [4]:
data['Time Event Began'] = data['Time Event Began'].str.replace('a.m.', 'AM',case=False)
data['Time Event Began'] = data['Time Event Began'].str.replace('p.m.', 'PM',case=False)
data

Unnamed: 0,Event Description,Year,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags
0,Severe Weather - Thunderstorms,2014,6/30/2014,8:00 PM,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm"
1,Severe Weather - Thunderstorms,2014,6/30/2014,11:20 PM,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm"
2,Severe Weather - Thunderstorms,2014,6/30/2014,5:55 PM,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm"
3,Fuel Supply Emergency - Coal,2014,6/27/2014,1:21 PM,Unknown,Unknown,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal"
4,Physical Attack - Vandalism,2014,6/24/2014,2:54 PM,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical"
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,4:00 PM,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption
1648,Vandalism,2000,3/14/2000,9:06 PM,,,Alliant Energy,Maine,MAIN,,,vandalism
1649,Ice Storm,2000,1/29/2000,10:00 PM,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm"
1650,Ice Storm,2000,1/24/2000,7:00 PM,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm"


In [5]:
data['Event Datetime'] = pd.to_datetime(data['Date Event Began'].astype(str)+' '+data['Time Event Began'].astype(str),
                                        format='%m/%d/%Y %I:%M %p',
                                        errors='coerce')
data

Unnamed: 0,Event Description,Year,Date Event Began,Time Event Began,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime
0,Severe Weather - Thunderstorms,2014,6/30/2014,8:00 PM,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00
1,Severe Weather - Thunderstorms,2014,6/30/2014,11:20 PM,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00
2,Severe Weather - Thunderstorms,2014,6/30/2014,5:55 PM,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00
3,Fuel Supply Emergency - Coal,2014,6/27/2014,1:21 PM,Unknown,Unknown,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00
4,Physical Attack - Vandalism,2014,6/24/2014,2:54 PM,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,4:00 PM,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00
1648,Vandalism,2000,3/14/2000,9:06 PM,,,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00
1649,Ice Storm,2000,1/29/2000,10:00 PM,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00
1650,Ice Storm,2000,1/24/2000,7:00 PM,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00


In [6]:
data = data.drop(['Date Event Began','Time Event Began'],axis=1)
data

Unnamed: 0,Event Description,Year,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime
0,Severe Weather - Thunderstorms,2014,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00
1,Severe Weather - Thunderstorms,2014,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00
2,Severe Weather - Thunderstorms,2014,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00
3,Fuel Supply Emergency - Coal,2014,Unknown,Unknown,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00
4,Physical Attack - Vandalism,2014,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00
...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00
1648,Vandalism,2000,,,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00
1649,Ice Storm,2000,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00
1650,Ice Storm,2000,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00


In [7]:
data['Time of Restoration'] = data['Time of Restoration'].replace('Unknown',np.nan)
data['Date of Restoration'] = data['Date of Restoration'].replace('Unknown',np.nan)
data

Unnamed: 0,Event Description,Year,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime
0,Severe Weather - Thunderstorms,2014,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00
1,Severe Weather - Thunderstorms,2014,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00
2,Severe Weather - Thunderstorms,2014,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00
3,Fuel Supply Emergency - Coal,2014,,,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00
4,Physical Attack - Vandalism,2014,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00
...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,5:10:00 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00
1648,Vandalism,2000,,,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00
1649,Ice Storm,2000,2/3/2000,12:00:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00
1650,Ice Storm,2000,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00


In [8]:
data['Time of Restoration'] = data['Time of Restoration'].str.strip()
# data['Time of Restoration'] = data['Time of Restoration'].str.replace(r'[ap]\.m\.', lambda x: x.group().upper().replace('.', ''), regex=True, case=False)
data['Time of Restoration'] = data['Time of Restoration'].str.replace(r':00 (AM|PM)', r' \1', regex=True)
data['Time of Restoration'] = data['Time of Restoration'].str.replace(r'^(\d{1,2})\s+(AM|PM)$', r'\1:00 \2', regex=True, case=False)
data



Unnamed: 0,Event Description,Year,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime
0,Severe Weather - Thunderstorms,2014,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00
1,Severe Weather - Thunderstorms,2014,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00
2,Severe Weather - Thunderstorms,2014,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00
3,Fuel Supply Emergency - Coal,2014,,,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00
4,Physical Attack - Vandalism,2014,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00
...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,5:10 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00
1648,Vandalism,2000,,,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00
1649,Ice Storm,2000,2/3/2000,12:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00
1650,Ice Storm,2000,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00


In [9]:
data['Restoration Datetime'] = pd.to_datetime(
    data['Date of Restoration'].astype(str)+' '+data['Time of Restoration'].astype(str),
    format='%m/%d/%Y %I:%M %p',
    errors='coerce',
)
data

Unnamed: 0,Event Description,Year,Date of Restoration,Time of Restoration,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime
0,Severe Weather - Thunderstorms,2014,7/2/2014,6:30 PM,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00
1,Severe Weather - Thunderstorms,2014,7/1/2014,5:00 PM,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00
2,Severe Weather - Thunderstorms,2014,7/1/2014,2:53 AM,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00
3,Fuel Supply Emergency - Coal,2014,,,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT
4,Physical Attack - Vandalism,2014,6/24/2014,2:55 PM,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,3/18/2000,5:10 PM,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00
1648,Vandalism,2000,,,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT
1649,Ice Storm,2000,2/3/2000,12:00 PM,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00
1650,Ice Storm,2000,,,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT


In [10]:
data.drop(['Date of Restoration','Time of Restoration'],inplace=True,axis=1)


In [11]:
data

Unnamed: 0,Event Description,Year,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime
0,Severe Weather - Thunderstorms,2014,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00
1,Severe Weather - Thunderstorms,2014,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00
2,Severe Weather - Thunderstorms,2014,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00
3,Fuel Supply Emergency - Coal,2014,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT
4,Physical Attack - Vandalism,2014,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00
...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00
1648,Vandalism,2000,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT
1649,Ice Storm,2000,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00
1650,Ice Storm,2000,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT


In [12]:
data['Outage Duration'] = data['Restoration Datetime'] - data['Event Datetime']
data

Unnamed: 0,Event Description,Year,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime,Outage Duration
0,Severe Weather - Thunderstorms,2014,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00,1 days 22:30:00
1,Severe Weather - Thunderstorms,2014,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00,0 days 17:40:00
2,Severe Weather - Thunderstorms,2014,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00,0 days 08:58:00
3,Fuel Supply Emergency - Coal,2014,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT,NaT
4,Physical Attack - Vandalism,2014,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00,0 days 00:01:00
...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00,0 days 01:10:00
1648,Vandalism,2000,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT,NaT
1649,Ice Storm,2000,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00,4 days 14:00:00
1650,Ice Storm,2000,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT,NaT


In [15]:
data['Outage Duration(in hrs)'] =(data['Outage Duration'].dt.total_seconds()/3600).round(2)
data

Unnamed: 0,Event Description,Year,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime,Outage Duration,Outage Duration(in hrs)
0,Severe Weather - Thunderstorms,2014,Exelon Corporation/ComEd,Illinois,RFC,Unknown,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00,1 days 22:30:00,46.50
1,Severe Weather - Thunderstorms,2014,Northern Indiana Public Service Company,North Central Indiana,RFC,Unknown,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00,0 days 17:40:00,17.67
2,Severe Weather - Thunderstorms,2014,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00,0 days 08:58:00,8.97
3,Fuel Supply Emergency - Coal,2014,We Energies,Wisconsin,MRO,Unknown,Unknown,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT,NaT,
4,Physical Attack - Vandalism,2014,Tennessee Valley Authority,"Nashville, Tennessee",SERC,Unknown,Unknown,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00,0 days 00:01:00,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00,0 days 01:10:00,1.17
1648,Vandalism,2000,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT,NaT,
1649,Ice Storm,2000,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00,4 days 14:00:00,110.00
1650,Ice Storm,2000,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT,NaT,


## Handling NaN Values

In [16]:
data = data.replace(['Unknown','unknown','NA','N/A',''],np.nan)
data

Unnamed: 0,Event Description,Year,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime,Outage Duration,Outage Duration(in hrs)
0,Severe Weather - Thunderstorms,2014,Exelon Corporation/ComEd,Illinois,RFC,,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00,1 days 22:30:00,46.50
1,Severe Weather - Thunderstorms,2014,Northern Indiana Public Service Company,North Central Indiana,RFC,,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00,0 days 17:40:00,17.67
2,Severe Weather - Thunderstorms,2014,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00,0 days 08:58:00,8.97
3,Fuel Supply Emergency - Coal,2014,We Energies,Wisconsin,MRO,,,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT,NaT,
4,Physical Attack - Vandalism,2014,Tennessee Valley Authority,"Nashville, Tennessee",SERC,,,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00,0 days 00:01:00,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00,0 days 01:10:00,1.17
1648,Vandalism,2000,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT,NaT,
1649,Ice Storm,2000,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00,4 days 14:00:00,110.00
1650,Ice Storm,2000,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT,NaT,


In [17]:
data.isna().sum()

Event Description                 0
Year                              0
Respondent                        0
Geographic Areas                  9
NERC Region                       2
Demand Loss (MW)                614
Number of Customers Affected    364
Tags                             13
Event Datetime                   20
Restoration Datetime            690
Outage Duration                 691
Outage Duration(in hrs)         691
dtype: int64

In [18]:
data['Geographic Areas'] = data['Geographic Areas'].fillna('Unknown')
data

Unnamed: 0,Event Description,Year,Respondent,Geographic Areas,NERC Region,Demand Loss (MW),Number of Customers Affected,Tags,Event Datetime,Restoration Datetime,Outage Duration,Outage Duration(in hrs)
0,Severe Weather - Thunderstorms,2014,Exelon Corporation/ComEd,Illinois,RFC,,420000,"severe weather, thunderstorm",2014-06-30 20:00:00,2014-07-02 18:30:00,1 days 22:30:00,46.50
1,Severe Weather - Thunderstorms,2014,Northern Indiana Public Service Company,North Central Indiana,RFC,,127000,"severe weather, thunderstorm",2014-06-30 23:20:00,2014-07-01 17:00:00,0 days 17:40:00,17.67
2,Severe Weather - Thunderstorms,2014,We Energies,Southeast Wisconsin,MRO,424,120000,"severe weather, thunderstorm",2014-06-30 17:55:00,2014-07-01 02:53:00,0 days 08:58:00,8.97
3,Fuel Supply Emergency - Coal,2014,We Energies,Wisconsin,MRO,,,"fuel supply emergency, coal",2014-06-27 13:21:00,NaT,NaT,
4,Physical Attack - Vandalism,2014,Tennessee Valley Authority,"Nashville, Tennessee",SERC,,,"vandalism, physical",2014-06-24 14:54:00,2014-06-24 14:55:00,0 days 00:01:00,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...
1647,Transmission Line Loss,2000,El Paso Elec. Co.,Texas,MAIN,400,100000,transmission interruption,2000-03-18 16:00:00,2000-03-18 17:10:00,0 days 01:10:00,1.17
1648,Vandalism,2000,Alliant Energy,Maine,MAIN,,,vandalism,2000-03-14 21:06:00,NaT,NaT,
1649,Ice Storm,2000,Duke Power Co.,South Carolina,SERC,300,81000,"severe weather, winter storm",2000-01-29 22:00:00,2000-02-03 12:00:00,4 days 14:00:00,110.00
1650,Ice Storm,2000,Carolina Power & Light,North Carolina & Northern South Carolina,SERC,960,173000,"severe weather, winter storm",2000-01-24 19:00:00,NaT,NaT,


In [20]:
data['NERC Region'] = data['NERC Region'].fillna('Unknown')

In [21]:
data.isna().sum()

Event Description                 0
Year                              0
Respondent                        0
Geographic Areas                  0
NERC Region                       0
Demand Loss (MW)                614
Number of Customers Affected    364
Tags                             13
Event Datetime                   20
Restoration Datetime            690
Outage Duration                 691
Outage Duration(in hrs)         691
dtype: int64

In [23]:
list(data['Geographic Areas'].unique())

['Illinois',
 'North Central Indiana',
 'Southeast  Wisconsin',
 'Wisconsin',
 'Nashville, Tennessee',
 'Washington',
 'Southeast Michigan',
 'Central Minnesota',
 'Somervell County, Texas',
 'Southern Mississippi',
 'Nogales, Arizona',
 'West Virginia',
 'Alberta, Canada',
 'North and Central , Alabama',
 'Texas',
 'Shelby County, Tennessee',
 'West Tennessee',
 'Phoenix, Arizona',
 'British Columbia & Alberta, Canada',
 'North Carolina',
 'Duchesne County, Utah',
 'San Diego & Orange Counties, California',
 'Whiting, Indiana',
 'Layton, Utah',
 'MISO North, Minnesota',
 'Indiana',
 'Imperial Valley, California',
 'Mississippi',
 'Alabama, Florida, Georgia',
 'Mississippi, Alabama',
 'Northeastern Mississippi, Northern Alabama',
 'Albany, Oregon',
 'Delaware',
 'Baton Rouge, Louisiana',
 'Michigan',
 'Western and Central Michigan',
 'Davis, California',
 'Puerto Rico',
 'Central Arkansas',
 'Montana',
 'Iowa',
 'Salt Lake City, Utah',
 'New York',
 'Glendale, Arizona',
 'Boone County,