## Exploratory Analysis for all temperature data 2000-2019

### Data sources

location temperature data - 
https://www.ncdc.noaa.gov/cdo-web/search

water temperature data - 
https://coastwatch.glerl.noaa.gov/statistic/statistic.html


### Date

#### Botanical Garden

- DATE - itself
- TOBS - Temperature at time of observation
- TMAX - Maximum Temperature
- TMIN - Minimum Temperature
- AVG - average between the maximum and the minimum daily values
- LOC - Location

#### Ohare Airport

- DATE
- TAVG - Average Temperature
- TMAX - Maximum Temperature
- TMIN - Minimum Temperature
- AVG - average between the maximum and the minimum daily values
- error - TAVG - AVG
- LOC - Location

#### Lake Michigan Temperature

- Year - year observations were taken
- Day - represents the day of the way
- Michigan - the average daily temp of lake michigan (C deg)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
# botanical gardens temp data

garden = pd.read_csv('data/temp_data/garden_temp_2000_2019.csv')
garden.head()

Unnamed: 0,DATE,TOBS,TMAX,TMIN,AVG,LOC
0,2000-01-01,37.0,44.0,28.0,36.0,garden
1,2000-01-02,48.0,49.0,32.0,40.5,garden
2,2000-01-03,37.0,62.0,35.0,48.5,garden
3,2000-01-04,29.0,39.0,25.0,32.0,garden
4,2000-01-05,19.0,30.0,14.0,22.0,garden


In [3]:
# ohare airport data

ohare = pd.read_csv('data/temp_data/ohare_temp_2000_2019.csv')
ohare.head()

Unnamed: 0,DATE,TAVG,TMAX,TMIN,AVG,error,LOC
0,2000-01-01,42.0,48.0,35.0,41.5,0.5,ohare
1,2000-01-02,48.0,60.0,35.0,47.5,0.5,ohare
2,2000-01-03,35.0,38.0,32.0,35.0,0.0,ohare
3,2000-01-04,28.0,33.0,23.0,28.0,0.0,ohare
4,2000-01-05,20.0,26.0,13.0,19.5,0.5,ohare


In [5]:
water = pd.read_csv('data/lake_michigan/lake_michigan_water_temp_1995_2019.csv')
water.head()

Unnamed: 0,Year,Day,Michigan
0,1995,1,5.02
1,1995,2,4.95
2,1995,3,0.2
3,1995,4,0.2
4,1995,5,0.2


In [6]:
garden.shape

(7046, 6)

In [7]:
ohare.shape

(7046, 7)

In [8]:
water.shape

(8766, 3)

In [9]:
water.isnull().sum()

Year        0
Day         0
Michigan    0
dtype: int64

In [10]:
ohare.dtypes

DATE      object
TAVG     float64
TMAX     float64
TMIN     float64
AVG      float64
error    float64
LOC       object
dtype: object

In [11]:
garden.dtypes

DATE     object
TOBS    float64
TMAX    float64
TMIN    float64
AVG     float64
LOC      object
dtype: object

In [12]:
water.dtypes

Year          int64
Day           int64
Michigan    float64
dtype: object

In [13]:
garden.DATE = pd.to_datetime(garden.DATE)

In [14]:
garden.head()

Unnamed: 0,DATE,TOBS,TMAX,TMIN,AVG,LOC
0,2000-01-01,37.0,44.0,28.0,36.0,garden
1,2000-01-02,48.0,49.0,32.0,40.5,garden
2,2000-01-03,37.0,62.0,35.0,48.5,garden
3,2000-01-04,29.0,39.0,25.0,32.0,garden
4,2000-01-05,19.0,30.0,14.0,22.0,garden


In [17]:
garden.tail()

Unnamed: 0_level_0,TOBS,TMAX,TMIN,AVG,LOC
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-15,34.0,36.0,30.0,33.0,garden
2019-04-16,54.0,54.0,34.0,44.0,garden
2019-04-17,45.0,72.0,41.0,56.5,garden
2019-04-18,51.0,75.0,45.0,60.0,garden
2019-04-19,40.0,58.0,40.0,49.0,garden


In [15]:
ohare.DATE = pd.to_datetime(ohare.DATE)

In [16]:
garden.set_index(keys='DATE', inplace=True)

In [17]:
ohare.set_index(keys='DATE', inplace=True)

In [18]:
garden.head()

Unnamed: 0_level_0,TOBS,TMAX,TMIN,AVG,LOC
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-01,37.0,44.0,28.0,36.0,garden
2000-01-02,48.0,49.0,32.0,40.5,garden
2000-01-03,37.0,62.0,35.0,48.5,garden
2000-01-04,29.0,39.0,25.0,32.0,garden
2000-01-05,19.0,30.0,14.0,22.0,garden


In [19]:
ohare.head()

Unnamed: 0_level_0,TAVG,TMAX,TMIN,AVG,error,LOC
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-01,42.0,48.0,35.0,41.5,0.5,ohare
2000-01-02,48.0,60.0,35.0,47.5,0.5,ohare
2000-01-03,35.0,38.0,32.0,35.0,0.0,ohare
2000-01-04,28.0,33.0,23.0,28.0,0.0,ohare
2000-01-05,20.0,26.0,13.0,19.5,0.5,ohare


In [20]:
df = pd.concat([ohare, garden], axis=1)

In [21]:
df.shape

(7049, 11)

In [22]:
df.head()

Unnamed: 0_level_0,TAVG,TMAX,TMIN,AVG,error,LOC,TOBS,TMAX,TMIN,AVG,LOC
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-01-01,42.0,48.0,35.0,41.5,0.5,ohare,37.0,44.0,28.0,36.0,garden
2000-01-02,48.0,60.0,35.0,47.5,0.5,ohare,48.0,49.0,32.0,40.5,garden
2000-01-03,35.0,38.0,32.0,35.0,0.0,ohare,37.0,62.0,35.0,48.5,garden
2000-01-04,28.0,33.0,23.0,28.0,0.0,ohare,29.0,39.0,25.0,32.0,garden
2000-01-05,20.0,26.0,13.0,19.5,0.5,ohare,19.0,30.0,14.0,22.0,garden


In [23]:
df.shape

(7049, 11)

# Let's rethink these sets.

In [24]:
data = pd.read_csv('data/chicago/jan012015_oct312017_unclean.csv')
print(data.shape)
data.head()

(106578, 18)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,AWND,DAPR,MDPR,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,TOBS,WDF2,WSF2
0,US1ILCK0148,"OAK LAWN 1.9 SE, IL US",41.6936,-87.729,182.3,3/10/15,,,,0.0,0.0,,,,,,,
1,US1ILCK0148,"OAK LAWN 1.9 SE, IL US",41.6936,-87.729,182.3,3/11/15,,,,0.0,0.0,,,,,,,
2,US1ILCK0148,"OAK LAWN 1.9 SE, IL US",41.6936,-87.729,182.3,3/15/15,,,,0.0,0.0,,,,,,,
3,US1ILCK0148,"OAK LAWN 1.9 SE, IL US",41.6936,-87.729,182.3,3/25/15,,,,0.2,,,,,,,,
4,US1ILCK0148,"OAK LAWN 1.9 SE, IL US",41.6936,-87.729,182.3,3/27/15,,,,0.01,,,,,,,,


In [26]:
location = (41.976, -87.648)

In [31]:
data[(data.LATITUDE <= 41.98) & (data.LATITUDE >= 41.95)]

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,AWND,DAPR,MDPR,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,TOBS,WDF2,WSF2
28745,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/7/15,,,,0.03,,,,,,,,
28746,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/8/15,,,,0.01,,,,,,,,
28747,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/9/15,,,,0.30,,,,,,,,
28748,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/10/15,,,,0.98,,,,,,,,
28749,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/20/15,,,,0.57,,,,,,,,
28750,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,4/25/15,,,,0.22,,,,,,,,
28751,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,5/5/15,,,,0.93,,,,,,,,
28752,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,5/6/15,,,,0.06,,,,,,,,
28753,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,5/9/15,,,,0.75,,,,,,,,
28754,US1ILCK0168,"RAVENSWOOD MANOR 0.1 ESE, IL US",41.9642,-87.6974,178.3,5/10/15,,,,0.09,,,,,,,,


In [65]:
# find the garden data

garden = pd.read_csv('data/temp_data/Botanical_Garden_FULL_2000_2019_temps.csv')
print(garden.shape)
garden.head()

(7046, 13)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,MDPR,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS
0,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2000-01-01,,0.0,0.0,0.0,44.0,28.0,37.0
1,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2000-01-02,,0.02,0.0,0.0,49.0,32.0,48.0
2,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2000-01-03,,0.0,0.0,0.0,62.0,35.0,37.0
3,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2000-01-04,,0.26,0.4,0.0,39.0,25.0,29.0
4,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2000-01-05,,0.0,0.0,0.0,30.0,14.0,19.0


In [66]:
garden.dtypes

STATION       object
NAME          object
LATITUDE     float64
LONGITUDE    float64
ELEVATION    float64
DATE          object
MDPR         float64
PRCP         float64
SNOW         float64
SNWD         float64
TMAX         float64
TMIN         float64
TOBS         float64
dtype: object

In [67]:
garden.isnull().sum()

STATION         0
NAME            0
LATITUDE        0
LONGITUDE       0
ELEVATION       0
DATE            0
MDPR         7045
PRCP           25
SNOW          312
SNWD          322
TMAX            9
TMIN           10
TOBS           14
dtype: int64

In [68]:
garden.columns

Index(['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'DATE', 'MDPR',
       'PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN', 'TOBS'],
      dtype='object')

In [72]:
garden = garden[['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'DATE', 'PRCP', 'TMAX', 'TMIN', 'TOBS']]
print(garden.shape)
print(garden.head())
garden.tail()

(7046, 10)
       STATION                           NAME  LATITUDE  LONGITUDE  ELEVATION  \
0  USC00111497  CHICAGO BOTANIC GARDEN, IL US  42.13987  -87.78537      192.0   
1  USC00111497  CHICAGO BOTANIC GARDEN, IL US  42.13987  -87.78537      192.0   
2  USC00111497  CHICAGO BOTANIC GARDEN, IL US  42.13987  -87.78537      192.0   
3  USC00111497  CHICAGO BOTANIC GARDEN, IL US  42.13987  -87.78537      192.0   
4  USC00111497  CHICAGO BOTANIC GARDEN, IL US  42.13987  -87.78537      192.0   

         DATE  PRCP  TMAX  TMIN  TOBS  
0  2000-01-01  0.00  44.0  28.0  37.0  
1  2000-01-02  0.02  49.0  32.0  48.0  
2  2000-01-03  0.00  62.0  35.0  37.0  
3  2000-01-04  0.26  39.0  25.0  29.0  
4  2000-01-05  0.00  30.0  14.0  19.0  


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
7041,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2019-04-15,0.91,36.0,30.0,34.0
7042,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2019-04-16,0.0,54.0,34.0,54.0
7043,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2019-04-17,0.0,72.0,41.0,45.0
7044,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2019-04-18,0.02,75.0,45.0,51.0
7045,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2019-04-19,0.33,58.0,40.0,40.0


In [70]:
garden.dtypes

STATION       object
NAME          object
LATITUDE     float64
LONGITUDE    float64
ELEVATION    float64
DATE          object
PRCP         float64
TMAX         float64
TMIN         float64
TOBS         float64
dtype: object

In [71]:
garden[garden.TMAX.isnull()]

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
920,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2002-07-09,0.7,,70.0,77.0
2025,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2005-07-18,0.0,,,
3832,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2010-06-29,0.0,,,
3858,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2010-07-25,0.0,,,
3908,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2010-09-13,0.0,,,
4292,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2011-10-02,0.0,,39.0,
4738,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2012-12-22,0.0,,16.0,18.0
6324,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2017-04-28,0.0,,39.0,45.0
6549,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2017-12-09,0.03,,,


In [89]:
garden_apr012013_apr192019 = garden[garden.DATE >= '2013-04-01']
print(garden_apr012013_apr192019.shape)
garden_apr012013_apr192019.head()

(2208, 10)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
4838,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-01,0.0,58.0,27.0,31.0
4839,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-02,0.0,41.0,25.0,28.0
4840,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-03,0.0,46.0,21.0,30.0
4841,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-04,0.0,41.0,25.0,33.0
4842,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-05,0.0,60.0,33.0,38.0


In [90]:
garden_apr012013_apr192019.DATE

4838    2013-04-01
4839    2013-04-02
4840    2013-04-03
4841    2013-04-04
4842    2013-04-05
4843    2013-04-06
4844    2013-04-07
4845    2013-04-08
4846    2013-04-09
4847    2013-04-10
4848    2013-04-11
4849    2013-04-12
4850    2013-04-13
4851    2013-04-14
4852    2013-04-15
4853    2013-04-16
4854    2013-04-17
4855    2013-04-18
4856    2013-04-19
4857    2013-04-20
4858    2013-04-21
4859    2013-04-22
4860    2013-04-23
4861    2013-04-24
4862    2013-04-25
4863    2013-04-26
4864    2013-04-27
4865    2013-04-28
4866    2013-04-29
4867    2013-04-30
           ...    
7016    2019-03-21
7017    2019-03-22
7018    2019-03-23
7019    2019-03-24
7020    2019-03-25
7021    2019-03-26
7022    2019-03-27
7023    2019-03-28
7024    2019-03-29
7025    2019-03-30
7026    2019-03-31
7027    2019-04-01
7028    2019-04-02
7029    2019-04-03
7030    2019-04-04
7031    2019-04-05
7032    2019-04-06
7033    2019-04-07
7034    2019-04-08
7035    2019-04-09
7036    2019-04-10
7037    2019

In [91]:
pd.to_datetime(garden_apr012013_apr192019.DATE)

4838   2013-04-01
4839   2013-04-02
4840   2013-04-03
4841   2013-04-04
4842   2013-04-05
4843   2013-04-06
4844   2013-04-07
4845   2013-04-08
4846   2013-04-09
4847   2013-04-10
4848   2013-04-11
4849   2013-04-12
4850   2013-04-13
4851   2013-04-14
4852   2013-04-15
4853   2013-04-16
4854   2013-04-17
4855   2013-04-18
4856   2013-04-19
4857   2013-04-20
4858   2013-04-21
4859   2013-04-22
4860   2013-04-23
4861   2013-04-24
4862   2013-04-25
4863   2013-04-26
4864   2013-04-27
4865   2013-04-28
4866   2013-04-29
4867   2013-04-30
          ...    
7016   2019-03-21
7017   2019-03-22
7018   2019-03-23
7019   2019-03-24
7020   2019-03-25
7021   2019-03-26
7022   2019-03-27
7023   2019-03-28
7024   2019-03-29
7025   2019-03-30
7026   2019-03-31
7027   2019-04-01
7028   2019-04-02
7029   2019-04-03
7030   2019-04-04
7031   2019-04-05
7032   2019-04-06
7033   2019-04-07
7034   2019-04-08
7035   2019-04-09
7036   2019-04-10
7037   2019-04-11
7038   2019-04-12
7039   2019-04-13
7040   201

In [92]:
pd.date_range(start = '1/1/15', end = '10/31/17' ).difference(pd.to_datetime(garden_apr012013_apr192019.DATE))


DatetimeIndex(['2016-11-16', '2017-04-26'], dtype='datetime64[ns]', freq=None)

In [93]:
garden_apr012013_apr192019.isnull().sum()

STATION      0
NAME         0
LATITUDE     0
LONGITUDE    0
ELEVATION    0
DATE         0
PRCP         3
TMAX         2
TMIN         2
TOBS         3
dtype: int64

In [94]:
garden_apr012013_apr192019[garden_apr012013_apr192019.PRCP.isnull() == True]

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
4858,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-21,,47.0,33.0,40.0
6221,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2017-01-14,,30.0,10.0,29.0
6224,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2017-01-17,,38.0,31.0,37.0


In [100]:
garden_apr012013_apr192019.to_csv('data/garden/apr012013_apr192019.csv', index=False)

In [101]:
data = pd.read_csv('data/garden/apr012013_apr192019.csv')
print(data.shape)
data.head()

(2208, 10)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
0,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-01,0.0,58.0,27.0,31.0
1,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-02,0.0,41.0,25.0,28.0
2,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-03,0.0,46.0,21.0,30.0
3,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-04,0.0,41.0,25.0,33.0
4,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-05,0.0,60.0,33.0,38.0


In [102]:
garden_apr012013_apr192019[garden_apr012013_apr192019.DATE < '2019']

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
4838,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-01,0.00,58.0,27.0,31.0
4839,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-02,0.00,41.0,25.0,28.0
4840,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-03,0.00,46.0,21.0,30.0
4841,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-04,0.00,41.0,25.0,33.0
4842,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-05,0.00,60.0,33.0,38.0
4843,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-06,0.00,45.0,34.0,43.0
4844,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-07,0.15,69.0,42.0,45.0
4845,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-08,0.51,56.0,36.0,46.0
4846,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-09,0.06,63.0,37.0,39.0
4847,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-10,0.21,49.0,37.0,39.0


In [103]:
# saving the data for 04/01/2013 - 12/31/2018
garden_apr012013_apr192019[garden_apr012013_apr192019.DATE < '2019'].to_csv('data/garden/apr012013_dec312018.csv', index=False)

# There are two dates missing from this set, 2016-11-16 and 2017-04-26.
# There are a few 

In [58]:
ohare = data[data.NAME == 'CHICAGO OHARE INTERNATIONAL AIRPORT, IL US']
print(ohare.shape)
ohare.head()

(1035, 18)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,AWND,DAPR,MDPR,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,TOBS,WDF2,WSF2
63108,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",41.995,-87.9336,201.8,1/1/15,14.54,,,0.0,0.0,0.0,20.0,32.0,14.0,,240.0,23.9
63109,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",41.995,-87.9336,201.8,1/2/15,4.7,,,0.0,0.0,0.0,26.0,34.0,15.0,,150.0,12.1
63110,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",41.995,-87.9336,201.8,1/3/15,5.59,,,0.57,0.4,0.0,31.0,34.0,29.0,,140.0,13.0
63111,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",41.995,-87.9336,201.8,1/4/15,15.88,,,0.13,2.2,1.2,29.0,33.0,2.0,,320.0,25.9
63112,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",41.995,-87.9336,201.8,1/5/15,10.29,,,0.15,1.8,2.0,2.0,6.0,-3.0,,310.0,21.9


In [59]:
ohare.isnull().sum()

STATION         0
NAME            0
LATITUDE        0
LONGITUDE       0
ELEVATION       0
DATE            0
AWND            0
DAPR         1035
MDPR         1035
PRCP            0
SNOW            0
SNWD            0
TAVG            0
TMAX            0
TMIN            0
TOBS         1035
WDF2            0
WSF2            0
dtype: int64

In [60]:
ohare.columns

Index(['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'DATE', 'AWND',
       'DAPR', 'MDPR', 'PRCP', 'SNOW', 'SNWD', 'TAVG', 'TMAX', 'TMIN', 'TOBS',
       'WDF2', 'WSF2'],
      dtype='object')