# Exploring the data for Buoy FSTI2 for the year 2015 - 2018.

#### Station FSTI2 - Foster Ave., Chicago, IL

Owned and maintained by [Chicago Park District](https://www.ndbc.noaa.gov/ndbcexit.php?url=https://wqdatalive.com/public/16&blurb=Chicago+Park+District)  
41.976 N 87.648 W (41°58'35" N 87°38'51" W)

### Table of contents

Exploring the data:  
  
[2015](#2015),
[2016](#2016), 
[2017](#2017), 
[2018](#2018), 
[2019](#2019)  
[2015-2018](#2015-2018)

### Data

 - (ATMP) air temperature 
 - (WDIR) Wind Direction 
 - (WSPD) Wind Speed 
 - (GST) "Peak 5 or 8 second gust speed (m/s) measured during the eight-minute or two-minute period. The 5 or 8 second period can be determined by payload, See the Sensor Reporting, Sampling, and Accuracy section." 
 
[Data Dictionary](https://www.ndbc.noaa.gov/measdes.shtml)
 
#### Missing data
 - 

In [1]:
import pandas as pd
import time

In [2]:
features = ['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'ATMP']

<a id='2014'></a>
### Exploring the data for 2014.

#### Missing Data
 - 

In [29]:
data_14 = pd.read_csv('../data/buoy_FSTI2/fsti2h2014.csv')
print(data_14.shape)
data_14.head()

(2049, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2014,7,12,11,0,198,4.9,20.6
1,2014,7,12,12,0,224,11.2,20.8
2,2014,7,12,13,0,216,9.8,22.5
3,2014,7,12,14,0,198,5.5,21.3
4,2014,7,12,15,0,236,7.0,21.7


In [30]:
data_14.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
ATMP    float64
dtype: object

In [31]:
data_14.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014,7,12,209.230769,7.0,23.623077
2014,7,13,226.583333,4.704167,23.229167
2014,7,14,218.521739,4.078261,21.247826
2014,7,15,308.0,6.104348,17.547826
2014,7,16,191.25,4.8625,16.408333


In [33]:
# checking to see which months are missing days
for i in range(1, 13):
    print(i, len(data_14[data_14['MM'] == i]['DD'].value_counts()))

1 0
2 0
3 0
4 0
5 0
6 0
7 20
8 31
9 27
10 12
11 1
12 0


<font color = blue>There are lots of missing values in this data set.</font>

<a id='2015'></a>
### Exploring the data for 2015.

#### Missing Data
 - 

In [3]:
data_15 = pd.read_csv('../data/buoy_FSTI2/fsti2h2015.csv')
print(data_15.shape)
data_15.head()

(8226, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2015,1,1,0,0,235,7.0,-7.4
1,2015,1,1,1,0,253,9.5,-7.4
2,2015,1,1,2,0,210,8.5,-7.4
3,2015,1,1,3,0,249,10.2,-7.3
4,2015,1,1,4,0,246,8.1,-7.0


In [4]:
data_15.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
ATMP    float64
dtype: object

In [5]:
data_15.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015,1,1,238.333333,9.890476,-5.928571
2015,1,2,216.086957,6.230435,-2.204348
2015,1,3,181.916667,5.654167,0.395833
2015,1,4,313.75,8.779167,-1.545833
2015,1,5,262.166667,9.5375,-16.145833


In [6]:
# checking to see which months are missing days
for i in range(1, 13):
    print(i, len(data_15[data_15['MM'] == i]['DD'].value_counts()))

1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30
10 31
11 30
12 31


<font color = blue>Every day is accounted for in this data set!</font>

In [7]:
# saving the data
data_15.to_csv('../data/buoy_FSTI2/fsti2_buoy_data_2015.csv', index=False)

In [8]:
data_2015 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2015.csv')
print(data_2015.shape)
data_2015.head()

(8226, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2015,1,1,0,0,235,7.0,-7.4
1,2015,1,1,1,0,253,9.5,-7.4
2,2015,1,1,2,0,210,8.5,-7.4
3,2015,1,1,3,0,249,10.2,-7.3
4,2015,1,1,4,0,246,8.1,-7.0


<a id='2016'></a>
### Exploring the data for 2016.

#### Missing Data
 - 

In [9]:
data_16 = pd.read_csv('../data/buoy_FSTI2/fsti2h2016.csv')
print(data_16.shape)
data_16.head()

(8480, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2016,1,1,0,0,246,5.8,-3.8
1,2016,1,1,1,0,214,7.7,-4.1
2,2016,1,1,2,0,204,4.4,-4.3
3,2016,1,1,3,0,219,8.0,-4.1
4,2016,1,1,4,0,268,6.8,-3.7


In [10]:
data_16.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
ATMP    float64
dtype: object

In [11]:
data_16.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016,1,1,249.791667,9.1875,-4.091667
2016,1,2,244.416667,7.6125,-1.6375
2016,1,3,296.625,9.175,-2.029167
2016,1,4,213.043478,10.113043,-2.252174
2016,1,5,165.25,8.220833,-2.45


In [12]:
# checking to see which months are missing days
for i in range(1, 13):
    print(i, len(data_16[data_16['MM'] == i]['DD'].value_counts()))

1 31
2 29
3 31
4 30
5 31
6 30
7 31
8 31
9 30
10 31
11 30
12 31


<font color = blue>Every day is accounted for in this data set!</font>

In [13]:
# saving the data
data_16.to_csv('../data/buoy_FSTI2/fsti2_buoy_data_2016.csv', index=False)

In [14]:
data_2016 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2016.csv')
print(data_2016.shape)
data_2016.head()

(8480, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2016,1,1,0,0,246,5.8,-3.8
1,2016,1,1,1,0,214,7.7,-4.1
2,2016,1,1,2,0,204,4.4,-4.3
3,2016,1,1,3,0,219,8.0,-4.1
4,2016,1,1,4,0,268,6.8,-3.7


<a id='2017'></a>
### Exploring the data for 2017.

#### Missing Data
 - 

In [16]:
data_17 = pd.read_csv('../data/buoy_FSTI2/fsti2h2017.csv')
print(data_17.shape)
data_17.head()

(8676, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2017,1,1,0,0,262,10.1,0.1
1,2017,1,1,1,0,270,7.1,-0.3
2,2017,1,1,2,0,5,1.3,-0.7
3,2017,1,1,3,0,210,3.2,-1.1
4,2017,1,1,4,0,264,3.5,-1.6


In [17]:
data_17.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
ATMP    float64
dtype: object

In [18]:
data_17.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,1,1,204.0,5.258333,-0.916667
2017,1,2,109.291667,3.2,1.820833
2017,1,3,271.333333,5.329167,3.333333
2017,1,4,275.541667,11.4125,-7.670833
2017,1,5,276.666667,9.395833,-12.5375


In [19]:
# checking to see which months are missing days
for i in range(1, 13):
    print(i, len(data_17[data_17['MM'] == i]['DD'].value_counts()))

1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30
10 31
11 30
12 31


<font color = blue>Every day is accounted for in this data set!</font>

In [20]:
# saving the data
data_17.to_csv('../data/buoy_FSTI2/fsti2_buoy_data_2017.csv', index=False)

In [21]:
data_2017 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2017.csv')
print(data_2017.shape)
data_2017.head()

(8676, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2017,1,1,0,0,262,10.1,0.1
1,2017,1,1,1,0,270,7.1,-0.3
2,2017,1,1,2,0,5,1.3,-0.7
3,2017,1,1,3,0,210,3.2,-1.1
4,2017,1,1,4,0,264,3.5,-1.6


<a id='2018'></a>
### Exploring the data for 2018.

#### Missing Data
 - 

In [22]:
data_18 = pd.read_csv('../data/buoy_FSTI2/fsti2h2018.csv')
print(data_18.shape)
data_18.head()

(8675, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2018,1,1,0,0,318,13.5,-14.2
1,2018,1,1,1,0,314,10.6,-15.0
2,2018,1,1,2,0,312,11.6,-15.7
3,2018,1,1,3,0,304,10.2,-16.5
4,2018,1,1,4,0,296,5.8,-17.4


In [23]:
data_18.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
ATMP    float64
dtype: object

In [24]:
data_18.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,1,1,279.25,9.520833,-18.558333
2018,1,2,245.291667,9.970833,-18.0625
2018,1,3,261.916667,9.008333,-11.091667
2018,1,4,305.083333,10.083333,-13.625
2018,1,5,298.916667,8.816667,-14.775


In [34]:
# checking to see which months are missing days
for i in range(1, 13):
    print(i, len(data_18[data_18['MM'] == i]['DD'].value_counts()))

1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30
10 31
11 30
12 31


<font color = blue>Every day is accounted for in this data set!</font>

In [26]:
# saving the data
data_18.to_csv('../data/buoy_FSTI2/fsti2_buoy_data_2018.csv', index=False)

In [27]:
data_2018 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2018.csv')
print(data_2018.shape)
data_2018.head()

(8675, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2018,1,1,0,0,318,13.5,-14.2
1,2018,1,1,1,0,314,10.6,-15.0
2,2018,1,1,2,0,312,11.6,-15.7
3,2018,1,1,3,0,304,10.2,-16.5
4,2018,1,1,4,0,296,5.8,-17.4


<a id='2015-2018'></a>
### Combining all of the data for 2015-2018.

#### Missing data
 - 

In [35]:
data_2015 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2015.csv')
data_2016 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2016.csv')
data_2017 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2017.csv')
data_2018 = pd.read_csv('../data/buoy_FSTI2/fsti2_buoy_data_2018.csv')

In [36]:
data = pd.concat([data_2015, data_2016, data_2017, data_2018], axis=0)
print(data.shape)
data.head()

(34057, 8)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,ATMP
0,2015,1,1,0,0,235,7.0,-7.4
1,2015,1,1,1,0,253,9.5,-7.4
2,2015,1,1,2,0,210,8.5,-7.4
3,2015,1,1,3,0,249,10.2,-7.3
4,2015,1,1,4,0,246,8.1,-7.0
