# In this notebook we bring together data for analysis.

### Resource used to determine these day counts:
[timeanddate.com](https://www.timeanddate.com/date/durationresult.html?m1=04&d1=1&y1=2013&m2=12&d2=31&y2=2018&ti=on)

Note: there are 

 - **3377** days in **01/01/2010 - 03/31/2019**, when you include both the first and last days in the set.
 - **2101** days in **04/01/2013 - 12/31/2018**, when you include both the first and last days in the set.
 - **1550** days in **01/01/2015 - 03/31/2019**, when you include both the first and last days in the set.
 - **1461** days in **01/01/2015 - 12/31/2018**, when you include both the first and last days in the set.

### Datasets included in this workbook:
 - buoy data from
  - Fsti2, Foster Beach
  - Jaki2, 63rd St
  - Chii2, Submerged off the coast
  
 - Ohare weather data
  
 - Lake Michigan Daily average temperatures

### We begin by combining all the datasets for the years 2015-2018.

In [1]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

# Dates: Jan 01, 2015 - Dec 31, 2018, daily averages

#### 1461 days included

### Datasets included:
 - Fsti2, Foster Beach buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

In [2]:
# buoy FSTI2
buoy = pd.read_csv('data/buoy_FSTI2/jan012015_dec312018_averages.csv')
print(buoy.shape)
buoy.head()

(1461, 4)


Unnamed: 0,DATE,WDIR,WSPD,ATMP
0,2015-01-01,238.333333,9.890476,-5.928571
1,2015-01-02,216.086957,6.230435,-2.204348
2,2015-01-03,181.916667,5.654167,0.395833
3,2015-01-04,313.75,8.779167,-1.545833
4,2015-01-05,262.166667,9.5375,-16.145833


In [3]:
buoy.rename(mapper={'WDIR': 'buoy_wdir',
                    'WSPD': 'buoy_wspd',
                    'ATMP': 'buoy_atmp'}, 
            axis=1, 
            inplace=True)

buoy.head()

Unnamed: 0,DATE,buoy_wdir,buoy_wspd,buoy_atmp
0,2015-01-01,238.333333,9.890476,-5.928571
1,2015-01-02,216.086957,6.230435,-2.204348
2,2015-01-03,181.916667,5.654167,0.395833
3,2015-01-04,313.75,8.779167,-1.545833
4,2015-01-05,262.166667,9.5375,-16.145833


In [4]:
# ohare weather data (Jan 01, 2015 to Dec 31, 2018)

ohare = pd.read_csv('data/ohare/jan012015_dec312018_averages.csv')
print(ohare.shape)
ohare.head()

(1461, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [5]:
# ohare.drop(columns=['WSF2', 'WDF2'], inplace=True)
# ohare.head()

In [6]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [7]:
# lake michigan daily average temps
lake_mic = pd.read_csv('data/lake_michigan/jan012015_dec312018_averages.csv')
print(lake_mic.shape)
lake_mic.head()

(1461, 2)


Unnamed: 0,DATE,lake_temp
0,2015-01-01,3.53
1,2015-01-02,3.5
2,2015-01-03,3.49
3,2015-01-04,3.36
4,2015-01-05,3.16


In [8]:
data = pd.concat([lake_mic, buoy.drop(columns='DATE'), ohare.drop(columns='DATE')], axis=1)
print(data.shape)
data.head()

(1461, 14)


Unnamed: 0,DATE,lake_temp,buoy_wdir,buoy_wspd,buoy_atmp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,3.53,238.333333,9.890476,-5.928571,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,3.5,216.086957,6.230435,-2.204348,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,3.49,181.916667,5.654167,0.395833,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,3.36,313.75,8.779167,-1.545833,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,3.16,262.166667,9.5375,-16.145833,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [9]:
# saving the data
data.to_csv('data/merged/jan012015_dec312018_averages.csv', index=False)

In [10]:
merged = pd.read_csv('data/merged/jan012015_dec312018_averages.csv')
print(merged.shape)
merged.head()

(1461, 14)


Unnamed: 0,DATE,lake_temp,buoy_wdir,buoy_wspd,buoy_atmp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,3.53,238.333333,9.890476,-5.928571,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,3.5,216.086957,6.230435,-2.204348,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,3.49,181.916667,5.654167,0.395833,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,3.36,313.75,8.779167,-1.545833,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,3.16,262.166667,9.5375,-16.145833,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


# Dates: January 1, 2015 - Dec 31, 2018, all data for buoy FSTI2.

#### 1461 days

### Datasets included:
 - Fsti2, Foster Beach buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

In [11]:
import pandas as pd

In [12]:
# buoy FSTI2

buoy = pd.read_csv('data/buoy_FSTI2/jan012015_dec312018.csv')
print(buoy.shape)
buoy.head()

(34057, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2015,1,1,0,0,235,7.0,-7.4
1,2015,1,1,1,0,253,9.5,-7.4
2,2015,1,1,2,0,210,8.5,-7.4
3,2015,1,1,3,0,249,10.2,-7.3
4,2015,1,1,4,0,246,8.1,-7.0


In [13]:
buoy['DATE'] = pd.to_datetime(buoy['#YY']*10000 + buoy['MM']*100 + buoy['DD'], format = '%Y%m%d')
print(buoy.shape)
buoy.head()

(34057, 9)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP,DATE
0,2015,1,1,0,0,235,7.0,-7.4,2015-01-01
1,2015,1,1,1,0,253,9.5,-7.4,2015-01-01
2,2015,1,1,2,0,210,8.5,-7.4,2015-01-01
3,2015,1,1,3,0,249,10.2,-7.3,2015-01-01
4,2015,1,1,4,0,246,8.1,-7.0,2015-01-01


In [14]:
buoy.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'ATMP', 'DATE'], dtype='object')

In [15]:
buoy = buoy[['DATE', 'hh', 'WDIR', 'WSPD', 'ATMP']]
print(buoy.shape)
buoy.head()

(34057, 5)


Unnamed: 0,DATE,hh,WDIR,WSPD,ATMP
0,2015-01-01,0,235,7.0,-7.4
1,2015-01-01,1,253,9.5,-7.4
2,2015-01-01,2,210,8.5,-7.4
3,2015-01-01,3,249,10.2,-7.3
4,2015-01-01,4,246,8.1,-7.0


In [16]:
buoy.rename(mapper={'WDIR': 'buoy_wdir',
                    'WSPD': 'buoy_wspd',
                    'ATMP': 'buoy_atmp'}, 
            axis=1, 
            inplace=True)

buoy.head()

Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp
0,2015-01-01,0,235,7.0,-7.4
1,2015-01-01,1,253,9.5,-7.4
2,2015-01-01,2,210,8.5,-7.4
3,2015-01-01,3,249,10.2,-7.3
4,2015-01-01,4,246,8.1,-7.0


In [17]:
# ohare weather data (Jan 01, 2015 to Dec 31, 2018)

ohare = pd.read_csv('data/ohare/jan012015_dec312018_averages.csv')
print(ohare.shape)
ohare.head()

(1461, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [18]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [19]:
ohare.dtypes

DATE             object
ohare_wspd      float64
ohare_prcp      float64
ohare_snfall    float64
ohare_sndpth    float64
ohare_atmp      float64
ohare_maxtmp      int64
ohare_mintmp      int64
ohare_w2dir       int64
ohare_w2spd     float64
dtype: object

In [20]:
ohare['DATE'] = pd.to_datetime(ohare['DATE'])
ohare.dtypes

DATE            datetime64[ns]
ohare_wspd             float64
ohare_prcp             float64
ohare_snfall           float64
ohare_sndpth           float64
ohare_atmp             float64
ohare_maxtmp             int64
ohare_mintmp             int64
ohare_w2dir              int64
ohare_w2spd            float64
dtype: object

In [21]:
# lake michigan daily average temps

lake_mic = pd.read_csv('data/lake_michigan/jan012015_dec312018_averages.csv')
print(lake_mic.shape)
lake_mic.head()

(1461, 2)


Unnamed: 0,DATE,lake_temp
0,2015-01-01,3.53
1,2015-01-02,3.5
2,2015-01-03,3.49
3,2015-01-04,3.36
4,2015-01-05,3.16


In [22]:
buoy.dtypes

DATE         datetime64[ns]
hh                    int64
buoy_wdir             int64
buoy_wspd           float64
buoy_atmp           float64
dtype: object

In [23]:
lake_mic.dtypes

DATE          object
lake_temp    float64
dtype: object

In [24]:
lake_mic['DATE'] = pd.to_datetime(lake_mic['DATE'])
lake_mic.dtypes

DATE         datetime64[ns]
lake_temp           float64
dtype: object

In [25]:
# df = pd.merge(prices, filings, left_index=True, right_index=True, how='left')

data = pd.merge(buoy, lake_mic, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(34057, 6)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp
0,2015-01-01,0,235,7.0,-7.4,3.53
0,2015-01-01,1,253,9.5,-7.4,3.53
0,2015-01-01,2,210,8.5,-7.4,3.53
0,2015-01-01,3,249,10.2,-7.3,3.53
0,2015-01-01,4,246,8.1,-7.0,3.53


In [26]:
data = pd.merge(data, ohare, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(34057, 15)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [27]:
# saving the data

data.to_csv('data/merged/jan012015_dec312018.csv', index=False)

In [28]:
full_data = pd.read_csv('data/merged/jan012015_dec312018.csv')
print(full_data.shape)
full_data.head()

(34057, 15)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-01,1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
2,2015-01-01,2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
3,2015-01-01,3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
4,2015-01-01,4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [29]:
full_data.drop(columns=['DATE', 'hh'], inplace=True)
print(full_data.shape)
full_data.head()

(34057, 13)


Unnamed: 0,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [30]:
# save the uncorrelated data
full_data.to_csv('data/merged/jan012015_dec312018_uncorrelated.csv', index=False)

### Datasets included:
 - Chii2, _____ buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

# Dates: Apr 01, 2013 - Dec 31, 2018

#### 2101 days

### Datasets included:
 - Botanical Garden
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

In [32]:
import pandas as pd

In [33]:
garden = pd.read_csv('data/garden/apr012013_dec312018.csv')
print(garden.shape)
garden.head()

(2099, 10)


Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TMAX,TMIN,TOBS
0,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-01,0.0,58.0,27.0,31.0
1,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-02,0.0,41.0,25.0,28.0
2,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-03,0.0,46.0,21.0,30.0
3,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-04,0.0,41.0,25.0,33.0
4,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",42.13987,-87.78537,192.0,2013-04-05,0.0,60.0,33.0,38.0


In [37]:
garden = garden.drop(columns=['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION'])
print(garden.shape)
garden.head()

(2099, 5)


Unnamed: 0,DATE,PRCP,TMAX,TMIN,TOBS
0,2013-04-01,0.0,58.0,27.0,31.0
1,2013-04-02,0.0,41.0,25.0,28.0
2,2013-04-03,0.0,46.0,21.0,30.0
3,2013-04-04,0.0,41.0,25.0,33.0
4,2013-04-05,0.0,60.0,33.0,38.0


In [38]:
garden.dtypes

DATE     object
PRCP    float64
TMAX    float64
TMIN    float64
TOBS    float64
dtype: object

In [39]:
garden.isnull().sum()

DATE    0
PRCP    3
TMAX    2
TMIN    2
TOBS    3
dtype: int64

In [44]:
garden.rename(mapper={'PRCP': 'garden_prcp',
                     'TMAX': 'garden_maxtmp',
                     'TMIN': 'garden_mintmp',
                     'TOBS': 'garden_tobs'},
             axis=1,
             inplace=True)

garden.head()

Unnamed: 0,DATE,garden_prcp,garden_maxtmp,garden_mintmp,garden_tobs
0,2013-04-01,0.0,58.0,27.0,31.0
1,2013-04-02,0.0,41.0,25.0,28.0
2,2013-04-03,0.0,46.0,21.0,30.0
3,2013-04-04,0.0,41.0,25.0,33.0
4,2013-04-05,0.0,60.0,33.0,38.0


In [40]:
ohare = pd.read_csv('data/ohare/apr012013_dec312018.csv')
print(ohare.shape)
ohare.head()

(2101, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2013-04-01,11.63,0.0,0.0,0.0,36.0,41,29,310,21.9
1,2013-04-02,8.95,0.0,0.0,0.0,34.0,45,25,320,21.0
2,2013-04-03,6.93,0.0,0.0,0.0,35.0,44,25,60,14.1
3,2013-04-04,8.05,0.0,0.0,0.0,39.0,58,25,240,16.1
4,2013-04-05,11.86,0.0,0.0,0.0,44.0,45,36,20,21.0


In [45]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2013-04-01,11.63,0.0,0.0,0.0,36.0,41,29,310,21.9
1,2013-04-02,8.95,0.0,0.0,0.0,34.0,45,25,320,21.0
2,2013-04-03,6.93,0.0,0.0,0.0,35.0,44,25,60,14.1
3,2013-04-04,8.05,0.0,0.0,0.0,39.0,58,25,240,16.1
4,2013-04-05,11.86,0.0,0.0,0.0,44.0,45,36,20,21.0


In [41]:
ohare.isnull().sum()

DATE    0
AWND    0
PRCP    0
SNOW    0
SNWD    0
TAVG    0
TMAX    0
TMIN    0
WDF2    0
WSF2    0
dtype: int64

In [42]:
lake_mic = pd.read_csv('data/lake_michigan/apr012013_dec312018.csv')
print(lake_mic.shape)
lake_mic.head()

(2101, 2)


Unnamed: 0,DATE,lake_temp
0,2013-04-01,2.26
1,2013-04-02,2.29
2,2013-04-03,2.36
3,2013-04-04,2.46
4,2013-04-05,2.5


In [43]:
lake_mic.isnull().sum()

DATE         0
lake_temp    0
dtype: int64

In [47]:
garden.DATE = pd.to_datetime(garden.DATE)
garden.dtypes

DATE             datetime64[ns]
garden_prcp             float64
garden_maxtmp           float64
garden_mintmp           float64
garden_tobs             float64
dtype: object

In [48]:
ohare.DATE = pd.to_datetime(ohare.DATE)
ohare.dtypes

DATE            datetime64[ns]
ohare_wspd             float64
ohare_prcp             float64
ohare_snfall           float64
ohare_sndpth           float64
ohare_atmp             float64
ohare_maxtmp             int64
ohare_mintmp             int64
ohare_w2dir              int64
ohare_w2spd            float64
dtype: object

In [49]:
lake_mic.DATE = pd.to_datetime(lake_mic.DATE)
lake_mic.dtypes

DATE         datetime64[ns]
lake_temp           float64
dtype: object

In [50]:
data = pd.merge(garden, lake_mic, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(2099, 6)


Unnamed: 0,DATE,garden_prcp,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp
0,2013-04-01,0.0,58.0,27.0,31.0,2.26
1,2013-04-02,0.0,41.0,25.0,28.0,2.29
2,2013-04-03,0.0,46.0,21.0,30.0,2.36
3,2013-04-04,0.0,41.0,25.0,33.0,2.46
4,2013-04-05,0.0,60.0,33.0,38.0,2.5


In [51]:
data = pd.merge(data, ohare, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(2099, 15)


Unnamed: 0,DATE,garden_prcp,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2013-04-01,0.0,58.0,27.0,31.0,2.26,11.63,0.0,0.0,0.0,36.0,41,29,310,21.9
1,2013-04-02,0.0,41.0,25.0,28.0,2.29,8.95,0.0,0.0,0.0,34.0,45,25,320,21.0
2,2013-04-03,0.0,46.0,21.0,30.0,2.36,6.93,0.0,0.0,0.0,35.0,44,25,60,14.1
3,2013-04-04,0.0,41.0,25.0,33.0,2.46,8.05,0.0,0.0,0.0,39.0,58,25,240,16.1
4,2013-04-05,0.0,60.0,33.0,38.0,2.5,11.86,0.0,0.0,0.0,44.0,45,36,20,21.0


In [52]:
data.to_csv('data/merged/garden_ohare_lake/apr012013_dec312018.csv', index=False)

In [53]:
merged = pd.read_csv('data/merged/garden_ohare_lake/apr012013_dec312018.csv')
print(merged.shape)
merged.head()

(2099, 15)


Unnamed: 0,DATE,garden_prcp,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2013-04-01,0.0,58.0,27.0,31.0,2.26,11.63,0.0,0.0,0.0,36.0,41,29,310,21.9
1,2013-04-02,0.0,41.0,25.0,28.0,2.29,8.95,0.0,0.0,0.0,34.0,45,25,320,21.0
2,2013-04-03,0.0,46.0,21.0,30.0,2.36,6.93,0.0,0.0,0.0,35.0,44,25,60,14.1
3,2013-04-04,0.0,41.0,25.0,33.0,2.46,8.05,0.0,0.0,0.0,39.0,58,25,240,16.1
4,2013-04-05,0.0,60.0,33.0,38.0,2.5,11.86,0.0,0.0,0.0,44.0,45,36,20,21.0


In [54]:
merged.isnull().sum()

DATE             0
garden_prcp      3
garden_maxtmp    2
garden_mintmp    2
garden_tobs      3
lake_temp        0
ohare_wspd       0
ohare_prcp       0
ohare_snfall     0
ohare_sndpth     0
ohare_atmp       0
ohare_maxtmp     0
ohare_mintmp     0
ohare_w2dir      0
ohare_w2spd      0
dtype: int64