ada app assessment

q1. Find the month in 2015 where the State of Washington had the largest number of storm events. How many days of storm-free weather occurred in that month?

In [1]:
import pandas as pd
import numpy as np

In [2]:
storm2000 = pd.read_csv('StormEvents_details-ftp_v1.0_d2000_c20160223.csv')
storm2015 = pd.read_csv('StormEvents_details-ftp_v1.0_d2015_c20160810.csv')

In [3]:
print storm2015.columns #sanity check

Index([u'BEGIN_YEARMONTH', u'BEGIN_DAY', u'BEGIN_TIME', u'END_YEARMONTH',
       u'END_DAY', u'END_TIME', u'EPISODE_ID', u'EVENT_ID', u'STATE',
       u'STATE_FIPS', u'YEAR', u'MONTH_NAME', u'EVENT_TYPE', u'CZ_TYPE',
       u'CZ_FIPS', u'CZ_NAME', u'WFO', u'BEGIN_DATE_TIME', u'CZ_TIMEZONE',
       u'END_DATE_TIME', u'INJURIES_DIRECT', u'INJURIES_INDIRECT',
       u'DEATHS_DIRECT', u'DEATHS_INDIRECT', u'DAMAGE_PROPERTY',
       u'DAMAGE_CROPS', u'SOURCE', u'MAGNITUDE', u'MAGNITUDE_TYPE',
       u'FLOOD_CAUSE', u'CATEGORY', u'TOR_F_SCALE', u'TOR_LENGTH',
       u'TOR_WIDTH', u'TOR_OTHER_WFO', u'TOR_OTHER_CZ_STATE',
       u'TOR_OTHER_CZ_FIPS', u'TOR_OTHER_CZ_NAME', u'BEGIN_RANGE',
       u'BEGIN_AZIMUTH', u'BEGIN_LOCATION', u'END_RANGE', u'END_AZIMUTH',
       u'END_LOCATION', u'BEGIN_LAT', u'BEGIN_LON', u'END_LAT', u'END_LON',
       u'EPISODE_NARRATIVE', u'EVENT_NARRATIVE', u'DATA_SOURCE'],
      dtype='object')


In [4]:
storm2015WA = storm2015.loc[storm2015['STATE'] == 'WASHINGTON']
print storm2015WA.STATE.head() #sanity check to ascertain only the state of Washington is saved
print storm2015WA.STATE.tail() #sanity check

547     WASHINGTON
548     WASHINGTON
1682    WASHINGTON
2583    WASHINGTON
2584    WASHINGTON
Name: STATE, dtype: object
57156    WASHINGTON
57353    WASHINGTON
57354    WASHINGTON
57355    WASHINGTON
57356    WASHINGTON
Name: STATE, dtype: object


In [5]:
storm2015WA.MONTH_NAME.value_counts() #greatest number of times listed == greatest number of events
#it's December 

December     158
January      111
November      70
August        47
May           37
June          26
July          25
February      19
September     18
October       15
April          7
March          5
Name: MONTH_NAME, dtype: int64

In [6]:
storm2015DEC_WA = storm2015WA.loc[storm2015WA['MONTH_NAME'] == 'December']
print storm2015DEC_WA.MONTH_NAME.head() #sanity check to ascertain only the month of December is saved
print storm2015DEC_WA.MONTH_NAME.tail() #sanity check

49516    December
49517    December
49518    December
49519    December
49520    December
Name: MONTH_NAME, dtype: object
57156    December
57353    December
57354    December
57355    December
57356    December
Name: MONTH_NAME, dtype: object


In [7]:
storm_days = storm2015DEC_WA[['BEGIN_DAY', 'END_DAY']]

print storm_days #data at a glance

       BEGIN_DAY  END_DAY
49516          2        3
49517          2        3
49518          2        3
49519          2        3
49520          4        5
49521          4        5
49522          4        5
49523          4        5
49524          4        5
49525          5        6
49530         17       18
49531         17       18
49532         17       18
49533         17       18
49534         17       18
49535         17       18
49536         17       18
49782          3        3
49783          8        8
49784         10       10
49785         10       10
49786         10       10
49787         10       10
49788         19       20
49789         20       20
49790         12       12
49791         12       12
49792         22       22
50470         15       16
50471         15       16
...          ...      ...
56480         21       21
56481         21       21
56485         10       10
56486          1        2
56487          1        2
56492         12       12
56513       

In [8]:
#begin_day is one day, end_day (if different from begin_day) is another day
#some repetition of listed days occur, can drop duplicates to narrow down days with storms
#number of days with storm per event = end_day - begin_day + 1

#storm_days.duplicated() #see which ones are duplicates before dropping them
storm_days_cleaned = storm_days.drop_duplicates()
print storm_days_cleaned

       BEGIN_DAY  END_DAY
49516          2        3
49520          4        5
49525          5        6
49530         17       18
49782          3        3
49783          8        8
49784         10       10
49788         19       20
49789         20       20
49790         12       12
49792         22       22
50470         15       16
50477         21       22
52828          8        9
52829          9        9
53343         17       17
54055         12       13
54062          5        5
54063          6        6
54693         20       21
54761         21       21
55110         22       23
55114         23       24
55316         23       23
55973         13       13
56449          6        7
56455          7        7
56486          1        2
56531         10       11
56668          9       10
57354         22       24


In [9]:
storm_days_sorted = storm_days_cleaned.sort_values(['BEGIN_DAY', 'END_DAY'], axis=0, ascending=True)
#sort data first by BEGIN_DAY and then by END_DAY
print storm_days_sorted

       BEGIN_DAY  END_DAY
56486          1        2
49516          2        3
49782          3        3
49520          4        5
54062          5        5
49525          5        6
54063          6        6
56449          6        7
56455          7        7
49783          8        8
52828          8        9
52829          9        9
56668          9       10
49784         10       10
56531         10       11
49790         12       12
54055         12       13
55973         13       13
50470         15       16
53343         17       17
49530         17       18
49788         19       20
49789         20       20
54693         20       21
54761         21       21
50477         21       22
49792         22       22
55110         22       23
57354         22       24
55316         23       23
55114         23       24


In [10]:
#count the days accounting for repeated days and days skipped
#sanity check via hand count:
#days 1-24 are listed except for 14; therefore there are 23 days noted in the data
#and therefore there are 31 - 23 = 8 storm-free days

#initialize empty list of storm days
storming = []

#loop through listed December events
for index, row in storm_days_sorted.iterrows():
    #extend list with each set of np.arange('BEGIN_DAY', 'END_DAY' + 1)
    storming.extend(np.arange(storm_days_sorted.loc[index, 'BEGIN_DAY'], storm_days.loc[index, 'END_DAY']+1))      

#December has 31 days. Subtract the number of storming days to get storm-free days.
31 - len(set(storming)) #taking the set of the list removes duplicates


8