In [1]:
import pandas as pd
from pprint import pprint as pp
import numpy as np
import matplotlib.pyplot as plt

In [128]:
pd.set_option('display.max_rows', 20)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_columns', 20)

%matplotlib inline
plt.style.use('ggplot')

In [18]:
df = pd.read_csv('datasets/storms.csv.gz')

In [19]:
df = df.set_index('REFNUM')

In [20]:
# take a peak at the data
df.head()

Unnamed: 0_level_0,STATE__,BGN_DATE,BGN_TIME,TIME_ZONE,COUNTY,...,LATITUDE,LONGITUDE,LATITUDE_E,LONGITUDE_,REMARKS
REFNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1.0,1.0,4/18/1950 0:00:00,130,CST,97.0,...,3040.0,8812.0,3051.0,8806.0,
2.0,1.0,4/18/1950 0:00:00,145,CST,3.0,...,3042.0,8755.0,0.0,0.0,
3.0,1.0,2/20/1951 0:00:00,1600,CST,57.0,...,3340.0,8742.0,0.0,0.0,
4.0,1.0,6/8/1951 0:00:00,900,CST,89.0,...,3458.0,8626.0,0.0,0.0,
5.0,1.0,11/15/1951 0:00:00,1500,CST,43.0,...,3412.0,8642.0,0.0,0.0,


In [21]:
# what are the columns?
pp([column for column in df.columns])

['STATE__',
 'BGN_DATE',
 'BGN_TIME',
 'TIME_ZONE',
 'COUNTY',
 'COUNTYNAME',
 'STATE',
 'EVTYPE',
 'BGN_RANGE',
 'BGN_AZI',
 'BGN_LOCATI',
 'END_DATE',
 'END_TIME',
 'COUNTY_END',
 'COUNTYENDN',
 'END_RANGE',
 'END_AZI',
 'END_LOCATI',
 'LENGTH',
 'WIDTH',
 'F',
 'MAG',
 'FATALITIES',
 'INJURIES',
 'PROPDMG',
 'PROPDMGEXP',
 'CROPDMG',
 'CROPDMGEXP',
 'WFO',
 'STATEOFFIC',
 'ZONENAMES',
 'LATITUDE',
 'LONGITUDE',
 'LATITUDE_E',
 'LONGITUDE_',
 'REMARKS']


In [22]:
# what's the shape?
print "There are %d rows and %d columns." % df.shape

There are 902297 rows and 36 columns.


In [23]:
# what is in 'EVTYPE'
event_types = [event for event in df['EVTYPE'].unique()]
pp(event_types)

['TORNADO',
 'TSTM WIND',
 'HAIL',
 'FREEZING RAIN',
 'SNOW',
 'ICE STORM/FLASH FLOOD',
 'SNOW/ICE',
 'WINTER STORM',
 'HURRICANE OPAL/HIGH WINDS',
 'THUNDERSTORM WINDS',
 'RECORD COLD',
 'HURRICANE ERIN',
 'HURRICANE OPAL',
 'HEAVY RAIN',
 'LIGHTNING',
 'THUNDERSTORM WIND',
 'DENSE FOG',
 'RIP CURRENT',
 'THUNDERSTORM WINS',
 'FLASH FLOOD',
 'FLASH FLOODING',
 'HIGH WINDS',
 'FUNNEL CLOUD',
 'TORNADO F0',
 'THUNDERSTORM WINDS LIGHTNING',
 'THUNDERSTORM WINDS/HAIL',
 'HEAT',
 'WIND',
 'LIGHTING',
 'HEAVY RAINS',
 'LIGHTNING AND HEAVY RAIN',
 'FUNNEL',
 'WALL CLOUD',
 'FLOODING',
 'THUNDERSTORM WINDS HAIL',
 'FLOOD',
 'COLD',
 'HEAVY RAIN/LIGHTNING',
 'FLASH FLOODING/THUNDERSTORM WI',
 'WALL CLOUD/FUNNEL CLOUD',
 'THUNDERSTORM',
 'WATERSPOUT',
 'EXTREME COLD',
 'HAIL 1.75)',
 'LIGHTNING/HEAVY RAIN',
 'HIGH WIND',
 'BLIZZARD',
 'BLIZZARD WEATHER',
 'WIND CHILL',
 'BREAKUP FLOODING',
 'HIGH WIND/BLIZZARD',
 'RIVER FLOOD',
 'HEAVY SNOW',
 'FREEZE',
 'COASTAL FLOOD',
 'HIGH WIND AND HIGH TI

In [24]:
print "There are %d unique event types." % len(event_types)

There are 985 unique event types.


In [25]:
a = df.isnull().sum()

In [26]:
for column in df.columns:
    print "{:20s} {:2.1f}".format(column, 100. * a[column]/len(df))

STATE__              0.0
BGN_DATE             0.0
BGN_TIME             0.0
TIME_ZONE            0.0
COUNTY               0.0
COUNTYNAME           0.2
STATE                0.0
EVTYPE               0.0
BGN_RANGE            0.0
BGN_AZI              60.7
BGN_LOCATI           31.9
END_DATE             27.0
END_TIME             26.5
COUNTY_END           0.0
COUNTYENDN           100.0
END_RANGE            0.0
END_AZI              80.3
END_LOCATI           55.3
LENGTH               0.0
WIDTH                0.0
F                    93.5
MAG                  0.0
FATALITIES           0.0
INJURIES             0.0
PROPDMG              0.0
PROPDMGEXP           51.6
CROPDMG              0.0
CROPDMGEXP           68.5
WFO                  15.7
STATEOFFIC           27.6
ZONENAMES            65.8
LATITUDE             0.0
LONGITUDE            0.0
LATITUDE_E           0.0
LONGITUDE_           0.0
REMARKS              31.9


In [27]:
# drop any column that has a NA -- 
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html
print "Without dropping any rows or columns with NA, the shape is: {}".format(df.shape)
print "Dropping any rows with NA yields the following shape: {}".format(df.dropna().shape)
print "Dropping columns with NA yields the following shape: {}".format(df.dropna(axis=1).shape)

Without dropping any rows or columns with NA, the shape is: (902297, 36)
Dropping any rows with NA yields the following shape: (0, 36)
Dropping columns with NA yields the following shape: (902297, 19)


In [30]:
# Are there any duplicates? (We set the index to REFNUM)
df.duplicated().sum()

3143

In [31]:
# .duplicated returns a BOOLEAN -- 
df[df.duplicated()]

Unnamed: 0_level_0,STATE__,BGN_DATE,BGN_TIME,TIME_ZONE,COUNTY,...,LATITUDE,LONGITUDE,LATITUDE_E,LONGITUDE_,REMARKS
REFNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
19.0,1.0,5/10/1952 0:00:00,900,CST,57.0,...,3344.0,8745.0,0.0,0.0,
20.0,1.0,5/10/1952 0:00:00,900,CST,57.0,...,3344.0,8745.0,0.0,0.0,
260.0,1.0,7/4/1960 0:00:00,1750,CST,13.0,...,3139.0,8636.0,0.0,0.0,
603.0,1.0,5/14/1967 0:00:00,2330,CST,77.0,...,3453.0,8730.0,0.0,0.0,
604.0,1.0,5/14/1967 0:00:00,2330,CST,77.0,...,3453.0,8730.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...
900771.0,38.0,10/7/2011 0:00:00,03:00:00 PM,CST,7.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A strong early October low ...
900806.0,27.0,10/7/2011 0:00:00,04:00:00 PM,CST,1.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A strong early October low ...
900807.0,36.0,10/29/2011 0:00:00,01:00:00 PM,EST,57.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: An early season winter stor...
900808.0,38.0,10/24/2011 0:00:00,03:03:00 PM,CST,7.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: Seasonably warm and dry con...


In [None]:
# can call .duplicated on the entire dataframe or on a subset of the dataframe
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.duplicated.html


In [47]:
df.EVTYPE.value_counts()

HAIL                              288661
TSTM WIND                         219940
THUNDERSTORM WIND                  82563
TORNADO                            60652
FLASH FLOOD                        54277
                                   ...  
EXCESSIVE                              1
LANDSLUMP                              1
NORTHERN LIGHTS                        1
HURRICANE EMILY                        1
THUNDERSTORM WINDS/FLASH FLOOD         1
Name: EVTYPE, dtype: int64

In [58]:
test_df = pd.DataFrame(df.EVTYPE.value_counts() > 10)

In [60]:
test_df

Unnamed: 0,EVTYPE
HAIL,True
TSTM WIND,True
THUNDERSTORM WIND,True
TORNADO,True
FLASH FLOOD,True
...,...
EXCESSIVE,False
LANDSLUMP,False
NORTHERN LIGHTS,False
HURRICANE EMILY,False


In [None]:
test_df

In [63]:
df[ ['EVTYPE', 'FATALITIES'] ].groupby('EVTYPE').sum().sort(['FATALITIES'], ascending = [0])

  if __name__ == '__main__':


Unnamed: 0_level_0,FATALITIES
EVTYPE,Unnamed: 1_level_1
TORNADO,5633.0
EXCESSIVE HEAT,1903.0
FLASH FLOOD,978.0
HEAT,937.0
LIGHTNING,816.0
...,...
HIGH WINDS 82,0.0
HIGH WINDS AND WIND CHILL,0.0
HIGH WINDS DUST STORM,0.0
HIGH WINDS HEAVY RAINS,0.0


In [65]:
# are there any unreported property damages
print "Number of unreported property damages: {}.".format(df.PROPDMG.isnull().sum())

Number of unreported property damages: 0.


In [70]:
df = df[(df.FATALITIES > 0) | (df.INJURIES > 0) | (df.PROPDMGEXP.notnull()) | (df.CROPDMGEXP.notnull())]

In [71]:
# we now have reduced the number of event types for 985 to 491
len(df.EVTYPE.unique())

491

In [72]:
df.EVTYPE.value_counts()

HAIL                          94476
THUNDERSTORM WIND             81960
TSTM WIND                     65588
TORNADO                       52205
FLASH FLOOD                   33223
                              ...  
LIGHTNING FIRE                    1
TORNADOES, TSTM WIND, HAIL        1
TORNDAO                           1
NON-SEVERE WIND DAMAGE            1
RECORD WARMTH                     1
Name: EVTYPE, dtype: int64

In [80]:
# looks like there are a lot of EVTYPEs with count of 1 -- how many exactly?
print "There are {} EVTYPEs with only 1 occurance.".format((df.EVTYPE.value_counts() == 1).sum())

There are 247 EVTYPEs with only 1 occurance.


In [84]:
df['EVTYPE'] = df.EVTYPE.apply(lambda evt: evt.strip().upper())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [85]:
(df.EVTYPE.value_counts() == 1).sum()

224

In [86]:
official_event_types = [
   'Astronomical Low Tide', 'Avalanche',
   'Blizzard', 'Coastal Flood', 'Cold/Wind Chill', 'Debris Flow',
   'Dense Fog', 'Dense Smoke', 'Drought', 'Dust Devil', 'Dust Storm',
   'Excessive Heat', 'Extreme Cold/Wind Chill', 'Flash Flood', 'Flood',
   'Frost/Freeze', 'Funnel Cloud', 'Freezing Fog', 'Hail', 'Heat',
   'Heavy Rain', 'Heavy Snow', 'High Surf', 'High Wind',
   'Hurricane (Typhoon)', 'Ice Storm', 'Lake-Effect Snow',
   'Lakeshore Flood', 'Lightning', 'Marine Hail', 'Marine High Wind',
   'Marine Strong Wind', 'Marine Thunderstorm Wind', 'Rip Current',
   'Seiche', 'Sleet', 'Storm Surge/Tide', 'Strong Wind',
   'Thunderstorm Wind', 'Tornado', 'Tropical Depression',
   'Tropical Storm', 'Tsunami', 'Volcanic Ash', 'Waterspout', 'Wildfire',
   'Winter Storm', 'Winter Weather',

   # We are also adding 'OTHER'...
   'OTHER'
]

official_event_types = map(lambda type: type.upper(), official_event_types)

In [87]:
official_event_types

['ASTRONOMICAL LOW TIDE',
 'AVALANCHE',
 'BLIZZARD',
 'COASTAL FLOOD',
 'COLD/WIND CHILL',
 'DEBRIS FLOW',
 'DENSE FOG',
 'DENSE SMOKE',
 'DROUGHT',
 'DUST DEVIL',
 'DUST STORM',
 'EXCESSIVE HEAT',
 'EXTREME COLD/WIND CHILL',
 'FLASH FLOOD',
 'FLOOD',
 'FROST/FREEZE',
 'FUNNEL CLOUD',
 'FREEZING FOG',
 'HAIL',
 'HEAT',
 'HEAVY RAIN',
 'HEAVY SNOW',
 'HIGH SURF',
 'HIGH WIND',
 'HURRICANE (TYPHOON)',
 'ICE STORM',
 'LAKE-EFFECT SNOW',
 'LAKESHORE FLOOD',
 'LIGHTNING',
 'MARINE HAIL',
 'MARINE HIGH WIND',
 'MARINE STRONG WIND',
 'MARINE THUNDERSTORM WIND',
 'RIP CURRENT',
 'SEICHE',
 'SLEET',
 'STORM SURGE/TIDE',
 'STRONG WIND',
 'THUNDERSTORM WIND',
 'TORNADO',
 'TROPICAL DEPRESSION',
 'TROPICAL STORM',
 'TSUNAMI',
 'VOLCANIC ASH',
 'WATERSPOUT',
 'WILDFIRE',
 'WINTER STORM',
 'WINTER WEATHER',
 'OTHER']

In [104]:
print "{}% of the EVTYPEs are official EVTYPEs".format(100. * df.EVTYPE.isin(official_event_types).sum() / len(df))

81.1455005369% of the EVTYPEs are official EVTYPEs


In [102]:
subset_df = df[df.EVTYPE.isin(official_event_types)]
df[~ df.index.isin(subset_df.index) ].shape

(84451, 36)

In [103]:
todo_df = df

Unnamed: 0_level_0,STATE__,BGN_DATE,BGN_TIME,TIME_ZONE,COUNTY,...,LATITUDE,LONGITUDE,LATITUDE_E,LONGITUDE_,REMARKS
REFNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1.0,1.0,4/18/1950 0:00:00,130,CST,97.0,...,3040.0,8812.0,3051.0,8806.0,
2.0,1.0,4/18/1950 0:00:00,145,CST,3.0,...,3042.0,8755.0,0.0,0.0,
3.0,1.0,2/20/1951 0:00:00,1600,CST,57.0,...,3340.0,8742.0,0.0,0.0,
4.0,1.0,6/8/1951 0:00:00,900,CST,89.0,...,3458.0,8626.0,0.0,0.0,
5.0,1.0,11/15/1951 0:00:00,1500,CST,43.0,...,3412.0,8642.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...
902293.0,56.0,11/30/2011 0:00:00,10:30:00 PM,MST,7.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A strong cold front moved s...
902294.0,30.0,11/10/2011 0:00:00,02:48:00 PM,MST,9.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A strong westerly flow alof...
902295.0,2.0,11/8/2011 0:00:00,02:58:00 PM,AKS,213.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A 960 mb low over the south...
902296.0,2.0,11/9/2011 0:00:00,10:21:00 AM,AKS,202.0,...,0.0,0.0,0.0,0.0,EPISODE NARRATIVE: A 960 mb low over the south...


In [105]:
unrecognized_event_types_to_recognized_event_types = {
   'COLD': 'COLD/WIND CHILL',
   'COLD AND SNOW': 'COLD/WIND CHILL',
   'COLD WEATHER': 'COLD/WIND CHILL',
   'EXTREME COLD': 'EXTREME COLD/WIND CHILL',
   'EXTREME HEAT': 'EXCESSIVE HEAT',
   'EXTREME WINDCHILL': 'EXTREME COLD/WIND CHILL',
   'FLASH FLOOD/FLOOD': 'FLASH FLOOD',
   'FLASH FLOODING': 'FLASH FLOOD',
   'FLASH FLOODING/FLOOD': 'FLASH FLOOD',
   'FLOOD/FLASH FLOOD': 'FLASH FLOOD',
   'FLOODING': 'FLOOD',
   'FOG': 'DENSE FOG',
   'FREEZE': 'FROST/FREEZE',
   'FREEZING RAIN': 'HEAVY RAIN',
   'GLAZE': 'OTHER',
   'HEAT WAVE': 'HEAT',
   'HEAT WAVES': 'HEAT',
   'HEAVY RAIN/SEVERE WEATHER': 'HEAVY RAIN',
   'HEAVY SURF': 'HIGH SURF',
   'HEAVY SURF/HIGH SURF': 'HIGH SURF',
   'HIGH SEAS': 'OTHER',
   'HIGH WINDS': 'HIGH WIND',
   'HURRICANE': 'HURRICANE (TYPHOON)',
   'HURRICANE ERIN': 'HURRICANE (TYPHOON)',
   'HURRICANE OPAL': 'HURRICANE (TYPHOON)',
   'HURRICANE/TYPHOON': 'HURRICANE (TYPHOON)',
   'HYPOTHERMIA/EXPOSURE': 'OTHER',
   'ICE': 'ICE STORM',
   'ICY ROADS': 'OTHER',
   'LANDSLIDE': 'OTHER',
   'LOW TEMPERATURE': 'COLD/WIND CHILL',
   'MARINE MISHAP': 'MARINE HAIL',
   'MARINE TSTM WIND': 'MARINE THUNDERSTORM WIND RIP CURRENT',
   'RECORD/EXCESSIVE HEAT': 'EXCESSIVE HEAT',
   'RIP CURRENTS': 'MARINE THUNDERSTORM WIND RIP CURRENT',
   'RIVER FLOOD': 'FLOOD',
   'ROUGH SEAS': 'MARINE STRONG WIND',
   'SEVERE THUNDERSTORM': 'THUNDERSTORM WIND',
   'STORM SURGE': 'STORM SURGE/TIDE',
   'STRONG WINDS': 'STRONG WIND',
   'THUNDERSTORM WINDS': 'THUNDERSTORM WIND',
   'TORNADOES, TSTM WIND, HAIL': 'TORNADO',
   'TROPICAL STORM GORDON': 'TROPICAL STORM',
   'TSTM WIND': 'THUNDERSTORM WIND',
   'TSTM WIND/HAIL': 'THUNDERSTORM WIND',
   'TYPHOON': 'HURRICANE (TYPHOON)',
   'UNSEASONABLY WARM': 'OTHER',
   'UNSEASONABLY WARM AND DRY': 'OTHER',
   'URBAN/SML STREAM FLD': 'FLOOD',
   'WILD FIRES': 'WILDFIRE',
   'WILD/FOREST FIRE': 'WILDFIRE',
   'WIND': 'HIGH WIND',
   'WINTER STORMS': 'WINTER STORM',
   'WINTER WEATHER/MIX': 'WINTER WEATHER'
}

In [112]:
def event_type(event_type):
   if event_type in official_event_types:
       return event_type

   if event_type in unrecognized_event_types_to_recognized_event_types:
       return unrecognized_event_types_to_recognized_event_types[event_type]

   return 'OTHER'

In [113]:
df['EVTYPE'] = df.EVTYPE.apply(event_type)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [115]:
100. * df.EVTYPE.isin(official_event_types).sum() / len(df)

100.0

In [116]:
df.FATALITIES.isnull().sum()

0

In [117]:
df.INJURIES.isnull().sum()

0

In [118]:
df.PROPDMG.head()

REFNUM
1.0    25.0
2.0     2.5
3.0    25.0
4.0     2.5
5.0     2.5
Name: PROPDMG, dtype: float64

In [119]:
df.PROPDMGEXP.head()

REFNUM
1.0    K
2.0    K
3.0    K
4.0    K
5.0    K
Name: PROPDMGEXP, dtype: object

In [121]:
print df.PROPDMGEXP.unique()

['K' 'M' nan 'B' 'm' '+' '0' '5' '6' '?' '4' '2' '3' 'h' '7' 'H' '-' '1'
 '8']


In [129]:
print df.PROPDMGEXP.value_counts()

K    424665
M     11330
0       216
B        40
5        28
1        25
2        13
?         8
m         7
H         6
+         5
7         5
3         4
4         4
6         4
-         1
8         1
h         1
Name: PROPDMGEXP, dtype: int64


In [132]:
subset_propdmg = df[['PROPDMG', 'PROPDMGEXP']]
subset_propdmg

Unnamed: 0_level_0,PROPDMG,PROPDMGEXP
REFNUM,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,25.0,K
2.0,2.5,K
3.0,25.0,K
4.0,2.5,K
5.0,2.5,K
6.0,2.5,K
7.0,2.5,K
8.0,2.5,K
9.0,25.0,K
10.0,25.0,K


In [None]:
# WAS WORKING on what's below but then we went over a different method in class
def convert_property_damage_exp(row):
    if row['PROPDMGEXP'] == 'K':
        row['PROPDMG'] = row['PROPDMG']
    if row['PROPDMGEXP'] == 'M':
        row['PROPDMG'] = row['PROPDMG']/1000.
    if row['PROPDMGEXP'] == 'B':
        row['PROPDMG'] = row['PROPDMG']/1000000.

In [133]:
exponent_to_multiplier = {np.nan: np.nan, '+': np.nan, '-': np.nan, '?': np.nan,
   '0': 10 ** 0,
   '1': 10 ** 1,
   '2': 10 ** 2, 'H': 10 ** 2, 'h': 10 ** 2,
   '3': 10 ** 3,'K': 10 ** 3,'k': 10 ** 3,
   '4': 10 ** 4,
   '5': 10 ** 5,
   '6': 10 ** 6, 'M': 10 ** 6, 'm': 10 ** 6,
   '7': 10 ** 7,
   '8': 10 ** 8,
   'B': 10 ** 9}

In [134]:
df.PROPDMG = df.PROPDMG * df.PROPDMGEXP.map(exponent_to_multiplier)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [135]:
df.PROPDMG

REFNUM
1.0         25000.0
2.0          2500.0
3.0         25000.0
4.0          2500.0
5.0          2500.0
6.0          2500.0
7.0          2500.0
8.0          2500.0
9.0         25000.0
10.0        25000.0
             ...   
902288.0        0.0
902289.0        0.0
902290.0        0.0
902291.0        0.0
902292.0        0.0
902293.0        0.0
902294.0        0.0
902295.0        0.0
902296.0        0.0
902297.0        0.0
Name: PROPDMG, dtype: float64