In [1]:
import pandas as pd

In [2]:
base_url = "http://www.spc.noaa.gov/wcm/data/"

years = ["2010","2011","2012","2013","2014","2015","2016"]
types = ['torn','hail','wind']

source_headings = {
    'torn':['om', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'stf', 'stn', 'mag',
           'inj', 'fat', 'loss', 'closs', 'slat', 'slon', 'elat', 'elon', 'len',
           'wid', 'ns', 'sn', 'sg', 'f1', 'f2', 'f3', 'f4', 'fc'],
    'hail':['om','yr','mo','dy','date','time','tz','st','stf','stn','mag',
            'inj','fat','loss','closs','slat','slon','elat','elon','len',
            'wid','ns','sn','sg','f1','f2','f3','f4'],
    'wind':['om','yr','mo','dy','date','time','tz','st','stf','stn','mag',
            'inj','fat','loss','closs','slat','slon','elat','elon','len',
            'wid','ns','sn','sg','f1','f2','f3','f4','mt']
}

filtered_columns = ["yr","mo","dy","date","time","st","mag","inj","fat","loss","closs","slat","slon"]

In [3]:
entire_df = pd.DataFrame()

for year in years:
    for weather_type in types:
        url = base_url+''+year+'_'+weather_type+'.csv'
        df = pd.read_csv(url, header=None)
        df.columns = source_headings[weather_type]
        df = df[filtered_columns]
        df = df.set_index('yr')
        df = df.drop(['yr'])
        df = df.reset_index()
        df['type'] = weather_type
        entire_df = entire_df.append(df)
        print(df.head())

     yr mo  dy        date      time  st mag inj fat  loss closs     slat  \
0  2010  1  18  2010-01-18  17:24:00  CA   0   0   0   0.0   0.0    36.73   
1  2010  1  19  2010-01-19  12:32:00  CA   0   0   0   0.0   0.0  34.4248   
2  2010  1  19  2010-01-19  14:55:00  CA   1   0   0   0.5   0.0   33.716   
3  2010  1  20  2010-01-20  15:27:00  LA   0   0   0   0.0   0.0   30.815   
4  2010  1  20  2010-01-20  16:26:00  TX   1   0   0  0.04   0.0  31.5095   

       slon  type  
0   -119.86  torn  
1  -119.877  torn  
2  -118.125  torn  
3  -92.0455  torn  
4  -93.9232  torn  
     yr  mo  dy        date      time  st   mag  inj  fat  loss  closs  \
0  2010  12  31  2010-12-31  22:40:00  MS  1.75    0    0   0.0    0.0   
1  2010  12  31  2010-12-31  22:25:00  MS  1.75    0    0   0.0    0.0   
2  2010  12  31  2010-12-31  22:20:00  MS  1.75    0    0   0.0    0.0   
3  2010  12  31  2010-12-31  22:20:00  MS  1.00    0    0   0.0    0.0   
4  2010  12  31  2010-12-31  21:50:00  AR  1.00

     yr  mo  dy        date      time  st  mag  inj  fat  loss  closs  \
0  2014   1   9  2014-01-09  17:19:00  FL   70    0    0  0.15    0.0   
1  2014   1  11  2014-01-11  05:18:00  AL   50    0    0  0.00    0.0   
2  2014   1  11  2014-01-11  05:53:00  GA   70    0    0  0.11    0.0   
3  2014   1  11  2014-01-11  06:15:00  AL   55    0    0  0.00    0.0   
4  2014   1  11  2014-01-11  06:20:00  AL   50    0    0  0.00    0.0   

      slat     slon  type  
0  27.0717 -80.1382  wind  
1  33.7223 -85.8386  wind  
2  34.0300 -85.2500  wind  
3  32.6100 -86.5500  wind  
4  32.4700 -86.6200  wind  
     yr mo dy        date      time  st mag inj fat   loss closs     slat  \
0  2015  1  3  2015-01-03  14:05:00  MS   0   0   0  0.001   0.0    31.43   
1  2015  1  3  2015-01-03  14:36:00  MS   1   0   0   0.02   0.0    31.68   
2  2015  1  3  2015-01-03  14:39:00  MS   1   0   0   0.04   0.0     31.7   
3  2015  1  3  2015-01-03  15:04:00  MS   2   0   0  0.507   0.0  32.1304   
4  2015 

In [4]:
entire_df.shape

(197287, 14)

In [5]:
entire_df = entire_df.reset_index(drop=True)

In [6]:
entire_df = entire_df.reset_index()

In [7]:
entire_df['yr'] = pd.to_numeric(entire_df['yr'],errors="coerce")
entire_df['mo'] = pd.to_numeric(entire_df['mo'],errors="coerce")
entire_df['dy'] = pd.to_numeric(entire_df['dy'],errors="coerce")
entire_df['mag'] = pd.to_numeric(entire_df['mag'],errors="coerce")
entire_df['inj'] = pd.to_numeric(entire_df['inj'],errors="coerce")
entire_df['fat'] = pd.to_numeric(entire_df['fat'],errors="coerce")
entire_df['loss'] = pd.to_numeric(entire_df['loss'],errors="coerce")
entire_df['closs'] = pd.to_numeric(entire_df['closs'],errors="coerce")
entire_df['slat'] = pd.to_numeric(entire_df['slat'],errors="coerce")
entire_df['slon'] = pd.to_numeric(entire_df['slon'],errors="coerce")

entire_df['date_time'] = entire_df['date']+' '+entire_df['time']


entire_df['date_time'] = pd.to_datetime(entire_df['date_time'],format="%Y-%m-%d %H:%M:%S")
    
entire_df.dtypes

index                 int64
yr                    int64
mo                    int64
dy                    int64
date                 object
time                 object
st                   object
mag                 float64
inj                   int64
fat                   int64
loss                float64
closs               float64
slat                float64
slon                float64
type                 object
date_time    datetime64[ns]
dtype: object

In [8]:
entire_df = entire_df.drop(['date','time'], axis=1)

entire_df.head()

Unnamed: 0,index,yr,mo,dy,st,mag,inj,fat,loss,closs,slat,slon,type,date_time
0,0,2010,1,18,CA,0.0,0,0,0.0,0.0,36.73,-119.86,torn,2010-01-18 17:24:00
1,1,2010,1,19,CA,0.0,0,0,0.0,0.0,34.4248,-119.877,torn,2010-01-19 12:32:00
2,2,2010,1,19,CA,1.0,0,0,0.5,0.0,33.716,-118.125,torn,2010-01-19 14:55:00
3,3,2010,1,20,LA,0.0,0,0,0.0,0.0,30.815,-92.0455,torn,2010-01-20 15:27:00
4,4,2010,1,20,TX,1.0,0,0,0.04,0.0,31.5095,-93.9232,torn,2010-01-20 16:26:00


In [9]:
# none to drop

entire_df.isnull().sum()

index        0
yr           0
mo           0
dy           0
st           0
mag          0
inj          0
fat          0
loss         0
closs        0
slat         0
slon         0
type         0
date_time    0
dtype: int64

In [10]:
entire_df.to_csv('AllEvents.csv')

In [11]:
from sqlalchemy import create_engine,MetaData,Table,Column

engine = create_engine('sqlite:///SevereWeather.sqlite')
entire_df.to_sql('Events', engine)

In [12]:
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base

In [13]:
# produce our own MetaData object
metadata = MetaData()

# we can reflect it ourselves from a database, using options
# such as 'only' to limit what tables we look at...
metadata.reflect(engine)


In [14]:
Base = automap_base(metadata=metadata)

In [15]:
Base.prepare()

In [16]:
Base.metadata.tables

immutabledict({'Events': Table('Events', MetaData(bind=None), Column('level_0', BIGINT(), table=<Events>), Column('index', BIGINT(), table=<Events>), Column('yr', BIGINT(), table=<Events>), Column('mo', BIGINT(), table=<Events>), Column('dy', BIGINT(), table=<Events>), Column('st', TEXT(), table=<Events>), Column('mag', FLOAT(), table=<Events>), Column('inj', BIGINT(), table=<Events>), Column('fat', BIGINT(), table=<Events>), Column('loss', FLOAT(), table=<Events>), Column('closs', FLOAT(), table=<Events>), Column('slat', FLOAT(), table=<Events>), Column('slon', FLOAT(), table=<Events>), Column('type', TEXT(), table=<Events>), Column('date_time', DATETIME(), table=<Events>), schema=None)})

In [17]:
from sqlalchemy import inspect,func
inspector = inspect(engine)

for table_name in inspector.get_table_names():
    for column in inspector.get_columns(table_name):
        print(f"{column['name']},{column['type']}")

level_0,BIGINT
index,BIGINT
yr,BIGINT
mo,BIGINT
dy,BIGINT
st,TEXT
mag,FLOAT
inj,BIGINT
fat,BIGINT
loss,FLOAT
closs,FLOAT
slat,FLOAT
slon,FLOAT
type,TEXT
date_time,DATETIME


In [18]:
Events = Table('Events',metadata)

In [19]:
inspector.reflecttable(Events,None)

In [20]:
session = Session(bind=engine)

results = session.query(Events.columns['yr']).limit(10)

In [21]:
for r in results:
    print(r)

(2010,)
(2010,)
(2010,)
(2010,)
(2010,)
(2010,)
(2010,)
(2010,)
(2010,)
(2010,)


In [22]:
results = session.query(Events.columns['yr']).limit(10)

In [23]:
Events.columns['yr']

Column('yr', BIGINT(), table=<Events>)

In [24]:
results = session.query(Events.columns['yr'],Events.columns['st'],Events.columns['mo'],func.sum(Events.columns['loss'])).\
group_by(Events.columns['yr'],Events.columns['st'],Events.columns['mo']).all()