# In this notebook we bring together data for analysis.

### Resource used to determine these day counts:
[timeanddate.com](https://www.timeanddate.com/date/durationresult.html?m1=04&d1=1&y1=2013&m2=12&d2=31&y2=2018&ti=on)

Note: there are 

 - **3377** days in **01/01/2010 - 03/31/2019**, when you include both the first and last days in the set.
 - **2101** days in **04/01/2013 - 12/31/2018**, when you include both the first and last days in the set.
 - **1550** days in **01/01/2015 - 03/31/2019**, when you include both the first and last days in the set.
 - **1461** days in **01/01/2015 - 12/31/2018**, when you include both the first and last days in the set.

### Datasets included in this workbook:
 - buoy data from
  - Fsti2, Foster Beach
  - Jaki2, 63rd St
  - Chii2, Submerged off the coast
  
 - Ohare weather data
  
 - Lake Michigan Daily average temperatures

### We begin by combining all the datasets for the years 2015-2018.

In [1]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

# Dates: Jan 01, 2015 - Dec 31, 2018, daily averages

#### 1461 days included

### Datasets included:
 - Fsti2, Foster Beach buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

In [2]:
# buoy FSTI2
buoy = pd.read_csv('data/buoy_FSTI2/jan012015_dec312018_averages.csv')
print(buoy.shape)
buoy.head()

(1461, 4)


Unnamed: 0,DATE,WDIR,WSPD,ATMP
0,2015-01-01,238.333333,9.890476,-5.928571
1,2015-01-02,216.086957,6.230435,-2.204348
2,2015-01-03,181.916667,5.654167,0.395833
3,2015-01-04,313.75,8.779167,-1.545833
4,2015-01-05,262.166667,9.5375,-16.145833


In [3]:
buoy.rename(mapper={'WDIR': 'buoy_wdir',
                    'WSPD': 'buoy_wspd',
                    'ATMP': 'buoy_atmp'}, 
            axis=1, 
            inplace=True)

buoy.head()

Unnamed: 0,DATE,buoy_wdir,buoy_wspd,buoy_atmp
0,2015-01-01,238.333333,9.890476,-5.928571
1,2015-01-02,216.086957,6.230435,-2.204348
2,2015-01-03,181.916667,5.654167,0.395833
3,2015-01-04,313.75,8.779167,-1.545833
4,2015-01-05,262.166667,9.5375,-16.145833


In [4]:
# ohare weather data (Jan 01, 2015 to Dec 31, 2018)

ohare = pd.read_csv('data/ohare/jan012015_dec312018_averages.csv')
print(ohare.shape)
ohare.head()

(1461, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [5]:
# ohare.drop(columns=['WSF2', 'WDF2'], inplace=True)
# ohare.head()

In [6]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [7]:
# lake michigan daily average temps
lake_mic = pd.read_csv('data/lake_michigan/jan012015_dec312018_averages.csv')
print(lake_mic.shape)
lake_mic.head()

(1461, 2)


Unnamed: 0,DATE,lake_temp
0,2015-01-01,3.53
1,2015-01-02,3.5
2,2015-01-03,3.49
3,2015-01-04,3.36
4,2015-01-05,3.16


In [8]:
data = pd.concat([lake_mic, buoy.drop(columns='DATE'), ohare.drop(columns='DATE')], axis=1)
print(data.shape)
data.head()

(1461, 14)


Unnamed: 0,DATE,lake_temp,buoy_wdir,buoy_wspd,buoy_atmp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,3.53,238.333333,9.890476,-5.928571,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,3.5,216.086957,6.230435,-2.204348,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,3.49,181.916667,5.654167,0.395833,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,3.36,313.75,8.779167,-1.545833,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,3.16,262.166667,9.5375,-16.145833,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [9]:
# saving the data
data.to_csv('data/merged/jan012015_dec312018_averages.csv', index=False)

In [10]:
merged = pd.read_csv('data/merged/jan012015_dec312018_averages.csv')
print(merged.shape)
merged.head()

(1461, 14)


Unnamed: 0,DATE,lake_temp,buoy_wdir,buoy_wspd,buoy_atmp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,3.53,238.333333,9.890476,-5.928571,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,3.5,216.086957,6.230435,-2.204348,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,3.49,181.916667,5.654167,0.395833,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,3.36,313.75,8.779167,-1.545833,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,3.16,262.166667,9.5375,-16.145833,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


# Dates: January 1, 2015 - Dec 31, 2018, all data for buoy FSTI2.

#### 1461 days

### Datasets included:
 - Fsti2, Foster Beach buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

In [11]:
import pandas as pd

In [12]:
# buoy FSTI2

buoy = pd.read_csv('data/buoy_FSTI2/jan012015_dec312018.csv')
print(buoy.shape)
buoy.head()

(34057, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2015,1,1,0,0,235,7.0,-7.4
1,2015,1,1,1,0,253,9.5,-7.4
2,2015,1,1,2,0,210,8.5,-7.4
3,2015,1,1,3,0,249,10.2,-7.3
4,2015,1,1,4,0,246,8.1,-7.0


In [13]:
buoy['DATE'] = pd.to_datetime(buoy['#YY']*10000 + buoy['MM']*100 + buoy['DD'], format = '%Y%m%d')
print(buoy.shape)
buoy.head()

(34057, 9)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP,DATE
0,2015,1,1,0,0,235,7.0,-7.4,2015-01-01
1,2015,1,1,1,0,253,9.5,-7.4,2015-01-01
2,2015,1,1,2,0,210,8.5,-7.4,2015-01-01
3,2015,1,1,3,0,249,10.2,-7.3,2015-01-01
4,2015,1,1,4,0,246,8.1,-7.0,2015-01-01


In [14]:
buoy.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'ATMP', 'DATE'], dtype='object')

In [15]:
buoy = buoy[['DATE', 'hh', 'WDIR', 'WSPD', 'ATMP']]
print(buoy.shape)
buoy.head()

(34057, 5)


Unnamed: 0,DATE,hh,WDIR,WSPD,ATMP
0,2015-01-01,0,235,7.0,-7.4
1,2015-01-01,1,253,9.5,-7.4
2,2015-01-01,2,210,8.5,-7.4
3,2015-01-01,3,249,10.2,-7.3
4,2015-01-01,4,246,8.1,-7.0


In [16]:
buoy.rename(mapper={'WDIR': 'buoy_wdir',
                    'WSPD': 'buoy_wspd',
                    'ATMP': 'buoy_atmp'}, 
            axis=1, 
            inplace=True)

buoy.head()

Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp
0,2015-01-01,0,235,7.0,-7.4
1,2015-01-01,1,253,9.5,-7.4
2,2015-01-01,2,210,8.5,-7.4
3,2015-01-01,3,249,10.2,-7.3
4,2015-01-01,4,246,8.1,-7.0


In [17]:
# ohare weather data (Jan 01, 2015 to Dec 31, 2018)

ohare = pd.read_csv('data/ohare/jan012015_dec312018_averages.csv')
print(ohare.shape)
ohare.head()

(1461, 10)


Unnamed: 0,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WDF2,WSF2
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [18]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-02,4.7,0.0,0.0,0.0,26.0,34,15,150,12.1
2,2015-01-03,5.59,0.57,0.4,0.0,31.0,34,29,140,13.0
3,2015-01-04,15.88,0.13,2.2,1.2,29.0,33,2,320,25.9
4,2015-01-05,10.29,0.15,1.8,2.0,2.0,6,-3,310,21.9


In [19]:
ohare.dtypes

DATE             object
ohare_wspd      float64
ohare_prcp      float64
ohare_snfall    float64
ohare_sndpth    float64
ohare_atmp      float64
ohare_maxtmp      int64
ohare_mintmp      int64
ohare_w2dir       int64
ohare_w2spd     float64
dtype: object

In [20]:
ohare['DATE'] = pd.to_datetime(ohare['DATE'])
ohare.dtypes

DATE            datetime64[ns]
ohare_wspd             float64
ohare_prcp             float64
ohare_snfall           float64
ohare_sndpth           float64
ohare_atmp             float64
ohare_maxtmp             int64
ohare_mintmp             int64
ohare_w2dir              int64
ohare_w2spd            float64
dtype: object

In [21]:
# lake michigan daily average temps

lake_mic = pd.read_csv('data/lake_michigan/jan012015_dec312018_averages.csv')
print(lake_mic.shape)
lake_mic.head()

(1461, 2)


Unnamed: 0,DATE,lake_temp
0,2015-01-01,3.53
1,2015-01-02,3.5
2,2015-01-03,3.49
3,2015-01-04,3.36
4,2015-01-05,3.16


In [22]:
buoy.dtypes

DATE         datetime64[ns]
hh                    int64
buoy_wdir             int64
buoy_wspd           float64
buoy_atmp           float64
dtype: object

In [23]:
lake_mic.dtypes

DATE          object
lake_temp    float64
dtype: object

In [24]:
lake_mic['DATE'] = pd.to_datetime(lake_mic['DATE'])
lake_mic.dtypes

DATE         datetime64[ns]
lake_temp           float64
dtype: object

In [25]:
# df = pd.merge(prices, filings, left_index=True, right_index=True, how='left')

data = pd.merge(buoy, lake_mic, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(34057, 6)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp
0,2015-01-01,0,235,7.0,-7.4,3.53
0,2015-01-01,1,253,9.5,-7.4,3.53
0,2015-01-01,2,210,8.5,-7.4,3.53
0,2015-01-01,3,249,10.2,-7.3,3.53
0,2015-01-01,4,246,8.1,-7.0,3.53


In [26]:
data = pd.merge(data, ohare, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(34057, 15)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
0,2015-01-01,4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [27]:
# saving the data

data.to_csv('data/merged/jan012015_dec312018.csv', index=False)

In [28]:
full_data = pd.read_csv('data/merged/jan012015_dec312018.csv')
print(full_data.shape)
full_data.head()

(34057, 15)


Unnamed: 0,DATE,hh,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,2015-01-01,0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,2015-01-01,1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
2,2015-01-01,2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
3,2015-01-01,3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
4,2015-01-01,4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [29]:
full_data.drop(columns=['DATE', 'hh'], inplace=True)
print(full_data.shape)
full_data.head()

(34057, 13)


Unnamed: 0,buoy_wdir,buoy_wspd,buoy_atmp,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_atmp,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,235,7.0,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
1,253,9.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
2,210,8.5,-7.4,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
3,249,10.2,-7.3,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9
4,246,8.1,-7.0,3.53,14.54,0.0,0.0,0.0,20.0,32,14,240,23.9


In [30]:
# save the uncorrelated data
full_data.to_csv('data/merged/jan012015_dec312018_uncorrelated.csv', index=False)

### Datasets included:
 - Chii2, _____ buoy data
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

# Dates: January 01, 1995 - December 31, 2018

#### 8766 days

### Datasets included:
 - Botanical Garden
  
 - O'Hare weather data
  
 - Lake Michigan Daily average temperatures

### Note: The Lake Michigan Dataset only includes daily averages through the end of 2018.

In [8]:
import pandas as pd

In [9]:
garden = pd.read_csv('data/garden/jan011995_may012019.csv')
print(garden.shape)
garden.head()

(8882, 9)


Unnamed: 0,STATION,DATE,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS,WDMV
0,USC00111497,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0,
1,USC00111497,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0,
2,USC00111497,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0,
3,USC00111497,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0,
4,USC00111497,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0,


In [10]:
garden = garden.drop(columns=['STATION', 'WDMV'])
print(garden.shape)
garden.head()

(8882, 7)


Unnamed: 0,DATE,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS
0,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0
1,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0
2,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0
3,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0
4,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0


In [11]:
garden.dtypes

DATE     object
PRCP    float64
SNOW    float64
SNWD    float64
TMAX    float64
TMIN    float64
TOBS    float64
dtype: object

In [12]:
garden.isnull().sum()

DATE      0
PRCP     28
SNOW    315
SNWD    362
TMAX     10
TMIN     11
TOBS     17
dtype: int64

In [13]:
garden.rename(mapper={'PRCP': 'garden_prcp',
                      'SNOW': 'garden_snfall',
                      'SNWD': 'garden_sndpth',
                     'TMAX': 'garden_maxtmp',
                     'TMIN': 'garden_mintmp',
                     'TOBS': 'garden_tobs'},
             axis=1,
             inplace=True)

garden.head()

Unnamed: 0,DATE,garden_prcp,garden_snfall,garden_sndpth,garden_maxtmp,garden_mintmp,garden_tobs
0,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0
1,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0
2,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0
3,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0
4,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0


In [14]:
ohare = pd.read_csv('data/ohare/jan011995_may012019.csv')
print(ohare.shape)
ohare.head()

(8884, 10)


Unnamed: 0,STATION,DATE,AWND,PRCP,SNOW,SNWD,TMAX,TMIN,WDF2,WSF2
0,USW00094846,1995-01-01,16.11,0.02,0.2,1.0,27,11,,
1,USW00094846,1995-01-02,14.09,0.0,0.0,1.0,21,8,,
2,USW00094846,1995-01-03,12.53,0.0,0.0,1.0,18,3,,
3,USW00094846,1995-01-04,12.3,0.0,0.0,1.0,7,-1,,
4,USW00094846,1995-01-05,16.33,0.0,0.0,0.0,19,-1,,


In [15]:
ohare.rename(mapper={'AWND': 'ohare_wspd',
                     'PRCP': 'ohare_prcp',
                     'SNOW': 'ohare_snfall',
                     'SNWD': 'ohare_sndpth',
                     'TAVG': 'ohare_atmp',
                     'TMAX': 'ohare_maxtmp',
                     'TMIN': 'ohare_mintmp',
                     'WDF2': 'ohare_w2dir',
                     'WSF2': 'ohare_w2spd'},
             axis=1,
             inplace=True)

ohare.head()

Unnamed: 0,STATION,DATE,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,USW00094846,1995-01-01,16.11,0.02,0.2,1.0,27,11,,
1,USW00094846,1995-01-02,14.09,0.0,0.0,1.0,21,8,,
2,USW00094846,1995-01-03,12.53,0.0,0.0,1.0,18,3,,
3,USW00094846,1995-01-04,12.3,0.0,0.0,1.0,7,-1,,
4,USW00094846,1995-01-05,16.33,0.0,0.0,0.0,19,-1,,


In [16]:
ohare.isnull().sum()

STATION           0
DATE              0
ohare_wspd        9
ohare_prcp        1
ohare_snfall    186
ohare_sndpth    376
ohare_maxtmp      0
ohare_mintmp      0
ohare_w2dir     404
ohare_w2spd     404
dtype: int64

In [17]:
lake_mic = pd.read_csv('data/lake_michigan/lake_michigan_water_temp_1995_2019_dateTime.csv')
print(lake_mic.shape)
lake_mic.head()

(8766, 2)


Unnamed: 0,DATE,lake_temp
0,1995-01-01,5.02
1,1995-01-02,4.95
2,1995-01-03,0.2
3,1995-01-04,0.2
4,1995-01-05,0.2


In [18]:
lake_mic.isnull().sum()

DATE         0
lake_temp    0
dtype: int64

In [19]:
garden.DATE = pd.to_datetime(garden.DATE)
garden.dtypes

DATE             datetime64[ns]
garden_prcp             float64
garden_snfall           float64
garden_sndpth           float64
garden_maxtmp           float64
garden_mintmp           float64
garden_tobs             float64
dtype: object

In [20]:
ohare.DATE = pd.to_datetime(ohare.DATE)
ohare.dtypes

STATION                 object
DATE            datetime64[ns]
ohare_wspd             float64
ohare_prcp             float64
ohare_snfall           float64
ohare_sndpth           float64
ohare_maxtmp             int64
ohare_mintmp             int64
ohare_w2dir            float64
ohare_w2spd            float64
dtype: object

In [21]:
ohare.drop(columns='STATION', inplace=True)

In [22]:
lake_mic.DATE = pd.to_datetime(lake_mic.DATE)
lake_mic.dtypes

DATE         datetime64[ns]
lake_temp           float64
dtype: object

In [23]:
data = pd.merge(garden, lake_mic, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(8763, 8)


Unnamed: 0,DATE,garden_prcp,garden_snfall,garden_sndpth,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp
0,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0,5.02
1,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0,4.95
2,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0,0.2
3,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0,0.2
4,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0,0.2


In [24]:
data = pd.merge(data, ohare, on='DATE', right_index=False, left_index=True)
print(data.shape)
data.head()

(8763, 16)


Unnamed: 0,DATE,garden_prcp,garden_snfall,garden_sndpth,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0,5.02,16.11,0.02,0.2,1.0,27,11,,
1,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0,4.95,14.09,0.0,0.0,1.0,21,8,,
2,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0,0.2,12.53,0.0,0.0,1.0,18,3,,
3,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0,0.2,12.3,0.0,0.0,1.0,7,-1,,
4,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0,0.2,16.33,0.0,0.0,0.0,19,-1,,


In [25]:
data.tail()

Unnamed: 0,DATE,garden_prcp,garden_snfall,garden_sndpth,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
8761,2018-12-27,0.0,0.0,0.0,49.0,26.0,40.0,4.17,15.66,0.66,0.0,0.0,53,39,170.0,21.9
8762,2018-12-28,0.47,0.0,0.0,54.0,37.0,38.0,4.04,15.21,0.08,0.2,0.0,53,30,230.0,23.9
8763,2018-12-29,0.11,0.1,0.0,38.0,26.0,27.0,4.03,7.61,0.05,0.9,0.0,30,22,240.0,21.0
8764,2018-12-30,0.03,0.3,0.0,31.0,19.0,21.0,4.07,11.18,0.0,0.0,0.0,36,19,210.0,19.9
8765,2018-12-31,0.06,0.0,1.0,37.0,21.0,36.0,4.07,8.72,0.86,0.0,0.0,39,31,360.0,25.1


In [28]:
data.to_csv('data/merged/garden_ohare_lake/jan011995_dec312018.csv', index=False)

In [29]:
merged = pd.read_csv('data/merged/garden_ohare_lake/jan011995_dec312018.csv')
print(merged.shape)
merged.head()

(8763, 16)


Unnamed: 0,DATE,garden_prcp,garden_snfall,garden_sndpth,garden_maxtmp,garden_mintmp,garden_tobs,lake_temp,ohare_wspd,ohare_prcp,ohare_snfall,ohare_sndpth,ohare_maxtmp,ohare_mintmp,ohare_w2dir,ohare_w2spd
0,1995-01-01,0.11,1.5,3.0,36.0,21.0,24.0,5.02,16.11,0.02,0.2,1.0,27,11,,
1,1995-01-02,0.0,0.0,1.0,24.0,7.0,12.0,4.95,14.09,0.0,0.0,1.0,21,8,,
2,1995-01-03,0.0,0.0,1.0,25.0,10.0,11.0,0.2,12.53,0.0,0.0,1.0,18,3,,
3,1995-01-04,0.0,0.0,1.0,18.0,-1.0,1.0,0.2,12.3,0.0,0.0,1.0,7,-1,,
4,1995-01-05,0.0,0.0,1.0,6.0,-1.0,3.0,0.2,16.33,0.0,0.0,0.0,19,-1,,


In [30]:
merged.isnull().sum()

DATE               0
garden_prcp       28
garden_snfall    313
garden_sndpth    358
garden_maxtmp     10
garden_mintmp     11
garden_tobs       17
lake_temp          0
ohare_wspd         7
ohare_prcp         1
ohare_snfall     186
ohare_sndpth     376
ohare_maxtmp       0
ohare_mintmp       0
ohare_w2dir      402
ohare_w2spd      402
dtype: int64

In [33]:
merged.dropna(inplace=True)