# Exploring the data for Buoy CHII2 for the year 2005 - 2018.

### Exploring the data for 2005.

#### Starting Oct 1, 2006, there are daily averages for 
 - (ATMP) air temperature 
 - (WDIR) Wind Direction 
 - (WSPD) Wind Speed 
 - (GST) "Peak 5 or 8 second gust speed (m/s) measured during the eight-minute or two-minute period. The 5 or 8 second period can be determined by payload, See the Sensor Reporting, Sampling, and Accuracy section." 
 
#### Missing data
 - Some in 2005
 - Some in 2006 (before 10)
 - Nov. 10-12, 2007
 
[Data Dictionary](https://www.ndbc.noaa.gov/measdes.shtml)

In [1]:
import pandas as pd

In [2]:
data_05 = pd.read_csv('../data/buoy_CHII2/chii2h2005.csv')
print(data_05.shape)
data_05.head()

(6804, 18)


Unnamed: 0,YYYY,MM,DD,hh,mm,WD,WSP,D GST,WVHT,DPD,APD,MWD,BAR,ATMP,WTMP,DEWP,VIS,TIDE
0,2005,2,14,20,0,300,9.3,11.8,99,99,99,999,9999,3.8,999,999,99,99
1,2005,2,14,21,0,290,10.8,12.9,99,99,99,999,9999,4.6,999,999,99,99
2,2005,2,14,22,0,280,11.8,14.4,99,99,99,999,9999,5.1,999,999,99,99
3,2005,2,14,23,0,290,11.8,14.4,99,99,99,999,9999,4.7,999,999,99,99
4,2005,2,15,0,0,280,9.3,11.3,99,99,99,999,9999,4.7,999,999,99,99


In [3]:
data_05.rename(mapper={
    'YYYY': '#YY',
    'WD': 'WDIR',
    'WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)

In [4]:
data_05.isnull().sum()

#YY     0
MM      0
DD      0
hh      0
mm      0
WDIR    0
WSPD    0
GST     0
WVHT    0
DPD     0
APD     0
MWD     0
BAR     0
ATMP    0
WTMP    0
DEWP    0
VIS     0
TIDE    0
dtype: int64

In [5]:
# there are 28 fields in the 'ATMP' feature that are not reported.
sum(data_05['ATMP'] == 999)

28

In [6]:
data_05[data_05['ATMP'] == 999]

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,BAR,ATMP,WTMP,DEWP,VIS,TIDE
483,2005,3,8,11,0,360,8.2,10.3,99,99,99,999,9999,999.0,999,999,99,99
484,2005,3,8,12,0,360,9.8,11.3,99,99,99,999,9999,999.0,999,999,99,99
485,2005,3,8,13,0,340,9.3,10.8,99,99,99,999,9999,999.0,999,999,99,99
486,2005,3,8,14,0,350,8.2,9.3,99,99,99,999,9999,999.0,999,999,99,99
487,2005,3,8,15,0,350,8.2,9.8,99,99,99,999,9999,999.0,999,999,99,99
488,2005,3,8,16,0,360,8.2,9.8,99,99,99,999,9999,999.0,999,999,99,99
489,2005,3,8,17,0,10,9.8,10.8,99,99,99,999,9999,999.0,999,999,99,99
490,2005,3,8,18,0,20,7.7,9.8,99,99,99,999,9999,999.0,999,999,99,99
491,2005,3,8,19,0,30,7.7,9.3,99,99,99,999,9999,999.0,999,999,99,99
492,2005,3,8,20,0,30,7.2,8.8,99,99,99,999,9999,999.0,999,999,99,99


In [7]:
data_05.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
BAR       int64
ATMP    float64
WTMP      int64
DEWP      int64
VIS       int64
TIDE      int64
dtype: object

In [8]:
data_05.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'GST', 'WVHT', 'DPD',
       'APD', 'MWD', 'BAR', 'ATMP', 'WTMP', 'DEWP', 'VIS', 'TIDE'],
      dtype='object')

In [9]:
features = ['#YY', 'MM', 'DD', 'hh', 'mm', 'WDIR', 'WSPD', 'GST', 'ATMP']

In [10]:
data_2005 = data_05[features]

In [11]:
data_2005.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,ATMP
0,2005,2,14,20,0,300,9.3,11.8,3.8
1,2005,2,14,21,0,290,10.8,12.9,4.6
2,2005,2,14,22,0,280,11.8,14.4,5.1
3,2005,2,14,23,0,290,11.8,14.4,4.7
4,2005,2,15,0,0,280,9.3,11.3,4.7


In [12]:
data_2005.groupby(['#YY', 'MM', 'DD']).mean().drop('hh', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mm,WDIR,WSPD,GST,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005,2,14,0.0,290.000000,10.925000,13.375000,4.550000
2005,2,15,0.0,168.750000,4.912500,5.808333,3.520833
2005,2,16,0.0,289.583333,7.245833,8.420833,-0.620833
2005,2,17,0.0,305.000000,9.522727,11.295455,-4.413636
2005,2,18,0.0,262.916667,6.329167,7.683333,-7.570833
2005,2,19,0.0,273.333333,7.320833,8.808333,-1.733333
2005,2,20,0.0,175.000000,8.150000,9.333333,0.658333
2005,2,21,0.0,225.000000,5.083333,5.820833,1.416667
2005,2,22,0.0,230.714286,4.964286,5.464286,-0.042857
2005,2,23,0.0,219.583333,3.679167,4.170833,-1.133333


### Exploring the data for 2006.

In [13]:
data_06 = pd.read_csv('../data/buoy_CHII2/chii2h2006.csv')
print(data_06.shape)
data_06.head()

(2182, 18)


Unnamed: 0,YYYY,MM,DD,hh,mm,WD,WSP,D GST,WVHT,DPD,APD,MWD,BAR,ATMP,WTMP,DEWP,VIS,TIDE
0,2006,3,1,0,0,280,12.4,14.4,99,99,99,999,9999,9.8,999,999,99,99
1,2006,3,30,14,0,190,4.6,5.7,99,99,99,999,9999,5.6,999,999,99,99
2,2006,3,30,16,0,180,6.7,7.2,99,99,99,999,9999,8.1,999,999,99,99
3,2006,3,30,17,0,180,8.8,9.3,99,99,99,999,9999,10.3,999,999,99,99
4,2006,3,30,18,0,190,8.8,10.3,99,99,99,999,9999,11.9,999,999,99,99


In [14]:
data_06.rename(mapper={
    'YYYY': '#YY',
    'WD': 'WDIR',
    'WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)

In [15]:
data_06.tail()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,BAR,ATMP,WTMP,DEWP,VIS,TIDE
2177,2006,12,31,17,0,140,10.3,12.9,99,99,99,999,9999,5.5,999,999,99,99
2178,2006,12,31,18,0,150,9.8,12.4,99,99,99,999,9999,5.7,999,999,99,99
2179,2006,12,31,20,0,150,8.8,10.3,99,99,99,999,9999,7.5,999,999,99,99
2180,2006,12,31,21,0,160,7.2,8.8,99,99,99,999,9999,9.4,999,999,99,99
2181,2006,12,31,22,0,230,10.3,13.4,99,99,99,999,9999,13.6,999,999,99,99


In [16]:
data_2006 = data_06[features]
data_2006.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,ATMP
0,2006,3,1,0,0,280,12.4,14.4,9.8
1,2006,3,30,14,0,190,4.6,5.7,5.6
2,2006,3,30,16,0,180,6.7,7.2,8.1
3,2006,3,30,17,0,180,8.8,9.3,10.3
4,2006,3,30,18,0,190,8.8,10.3,11.9


In [17]:
pd.options.display.max_rows = 500

In [18]:
data_2006.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1).tail(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,GST,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006,7,12,310.434783,5.556522,6.295652,19.695652
2006,7,13,185.416667,2.991667,3.575,20.6125
2006,7,14,130.869565,4.695652,5.521739,23.043478
2006,7,15,201.111111,4.233333,4.911111,26.15
2006,7,16,220.0,5.568182,6.568182,29.077273
2006,7,17,199.285714,8.714286,10.678571,29.528571
2006,7,18,124.285714,9.942857,11.842857,27.428571
2006,7,19,76.0,6.72,7.633333,23.04
2006,10,1,193.0,4.19,4.905,15.79
2006,10,2,142.222222,9.811111,11.705556,19.527778


### Exploring the data for 2007.
 - 10/10-12/2007 are missing

In [33]:
data_07 = pd.read_csv('../data/buoy_CHII2/chii2h2007.csv')
print(data_07.shape)
data_07.head()

(6697, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,#yr,mo,dy,hr,mn,deg,T m/s,m/s,m,sec,sec,deg,T hPa,degC,degC,degC,nmi,ft
1,2007,1,1,0,0,210,11.8,17,99,99,99,999,9999,11.8,999,999,99,99
2,2007,1,1,1,0,210,9.3,11.3,99,99,99,999,9999,10.6,999,999,99,99
3,2007,1,1,2,0,220,11.3,14.4,99,99,99,999,9999,10.2,999,999,99,99
4,2007,1,1,4,0,200,13.4,15.4,99,99,99,999,9999,8,999,999,99,99


In [34]:
sum(data_07['ATMP'] == 999)

0

In [35]:
sum(data_07['WDI'] == 999)

0

In [36]:
sum(data_07['R WSP'] == 999)

0

In [37]:
data_07.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,#yr,mo,dy,hr,mn,deg,T m/s,m/s,m,sec,sec,deg,T hPa,degC,degC,degC,nmi,ft
1,2007,1,1,0,0,210,11.8,17,99,99,99,999,9999,11.8,999,999,99,99
2,2007,1,1,1,0,210,9.3,11.3,99,99,99,999,9999,10.6,999,999,99,99
3,2007,1,1,2,0,220,11.3,14.4,99,99,99,999,9999,10.2,999,999,99,99
4,2007,1,1,4,0,200,13.4,15.4,99,99,99,999,9999,8,999,999,99,99


In [38]:
data_07.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)

In [39]:
data_07.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,#yr,mo,dy,hr,mn,deg,T m/s,m/s,m,sec,sec,deg,T hPa,degC,degC,degC,nmi,ft
1,2007,1,1,0,0,210,11.8,17,99,99,99,999,9999,11.8,999,999,99,99
2,2007,1,1,1,0,210,9.3,11.3,99,99,99,999,9999,10.6,999,999,99,99
3,2007,1,1,2,0,220,11.3,14.4,99,99,99,999,9999,10.2,999,999,99,99
4,2007,1,1,4,0,200,13.4,15.4,99,99,99,999,9999,8,999,999,99,99


In [40]:
data_2007 = data_07[features]

In [41]:
data_2007.drop(index=0, inplace=True)
print(data_2007.shape)
data_2007.head()

(6696, 9)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,ATMP
1,2007,1,1,0,0,210,11.8,17.0,11.8
2,2007,1,1,1,0,210,9.3,11.3,10.6
3,2007,1,1,2,0,220,11.3,14.4,10.2
4,2007,1,1,4,0,200,13.4,15.4,8.0
5,2007,1,1,5,0,220,10.8,16.0,7.5


In [42]:
data_2007.shape

(6696, 9)

In [43]:
data_2007 = data_2007.astype('float', inplace=True)

In [44]:
data_2007.shape

(6696, 9)

In [45]:
data_2007.dtypes

#YY     float64
MM      float64
DD      float64
hh      float64
mm      float64
WDIR    float64
WSPD    float64
GST     float64
ATMP    float64
dtype: object

In [46]:
data_2007.shape

(6696, 9)

In [52]:
data_07 = data_2007.groupby(['#YY', 'MM', 'DD']).mean().drop(['hh', 'mm'], axis=1)

In [53]:
data_07.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,GST,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007.0,1.0,1.0,258.333333,9.95,12.933333,5.316667
2007.0,1.0,2.0,254.0,5.715,6.585,2.43
2007.0,1.0,3.0,198.823529,10.717647,12.935294,3.541176
2007.0,1.0,4.0,194.117647,12.870588,15.458824,5.852941
2007.0,1.0,5.0,222.5,7.385,8.775,8.41


In [54]:
data_07.shape

(362, 4)

In [55]:
data_07

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,WDIR,WSPD,GST,ATMP
#YY,MM,DD,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007.0,1.0,1.0,258.333333,9.95,12.933333,5.316667
2007.0,1.0,2.0,254.0,5.715,6.585,2.43
2007.0,1.0,3.0,198.823529,10.717647,12.935294,3.541176
2007.0,1.0,4.0,194.117647,12.870588,15.458824,5.852941
2007.0,1.0,5.0,222.5,7.385,8.775,8.41
2007.0,1.0,6.0,293.157895,5.794737,6.968421,4.178947
2007.0,1.0,7.0,188.5,5.895,7.0,3.215
2007.0,1.0,8.0,243.846154,10.284615,12.876923,1.623077
2007.0,1.0,9.0,284.736842,9.863158,12.5,-0.121053
2007.0,1.0,10.0,253.333333,8.472222,10.483333,-3.822222


In [63]:
data_2007['DD'].value_counts(ascending=True)

31.0    116
29.0    196
10.0    198
12.0    202
11.0    203
4.0     207
9.0     207
30.0    211
8.0     213
13.0    213
15.0    213
28.0    214
6.0     217
26.0    218
27.0    220
2.0     221
16.0    222
5.0     223
17.0    224
25.0    224
22.0    225
14.0    225
24.0    226
20.0    230
7.0     231
18.0    231
19.0    232
3.0     232
1.0     232
23.0    233
21.0    237
Name: DD, dtype: int64

In [77]:
# this shows that the there are three days missing in November.
for i in range(1, 13):
    print(i, len(data_2007[data_2007['MM'] == i]['DD'].value_counts()))

1 31
2 28
3 31
4 30
5 31
6 30
7 31
8 31
9 30
10 31
11 27
12 31


In [79]:
nov_2007 = data_2007[data_2007['MM'] == 11.0]

In [85]:
# showing that days 10-12 in November are not represented.  We might try and populate this information with
# other data.
for i in range(1, 31):
    print(i, len(nov_2007[nov_2007['DD'] == i]))

1 24
2 20
3 22
4 20
5 20
6 20
7 20
8 17
9 5
10 0
11 0
12 0
13 7
14 21
15 14
16 22
17 21
18 21
19 23
20 22
21 22
22 20
23 22
24 21
25 22
26 22
27 17
28 22
29 21
30 22


### Exploring the data for 2008.

In [86]:
data_08 = pd.read_csv('../data/buoy_CHII2/chii2h2008.csv')
print(data_08.shape)
data_08.head()

(5138, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,#yr,mo,dy,hr,mn,deg,T m/s,m/s,m,sec,sec,deg,hPa,degC,degC,degC,nmi,ft
1,2008,1,1,0,0,140,4.1,5.1,99,99,99,999,9999,-0.3,999,999,99,99
2,2008,1,1,1,0,100,3.1,3.6,99,99,99,999,9999,-0.4,999,999,99,99
3,2008,1,1,2,0,50,4.1,4.6,99,99,99,999,9999,-0.4,999,999,99,99
4,2008,1,1,3,0,340,4.6,5.1,99,99,99,999,9999,-0.4,999,999,99,99


In [91]:
data_08.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_08.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,#yr,mo,dy,hr,mn,deg,T m/s,m/s,m,sec,sec,deg,hPa,degC,degC,degC,nmi,ft
1,2008,1,1,0,0,140,4.1,5.1,99,99,99,999,9999,-0.3,999,999,99,99
2,2008,1,1,1,0,100,3.1,3.6,99,99,99,999,9999,-0.4,999,999,99,99
3,2008,1,1,2,0,50,4.1,4.6,99,99,99,999,9999,-0.4,999,999,99,99
4,2008,1,1,3,0,340,4.6,5.1,99,99,99,999,9999,-0.4,999,999,99,99


In [97]:
data_08.dtypes

#YY     object
MM      object
DD      object
hh      object
mm      object
WDIR    object
WSPD    object
GST     object
WVHT    object
DPD     object
APD     object
MWD     object
PRES    object
ATMP    object
WTMP    object
DEWP    object
VIS     object
TIDE    object
dtype: object

In [98]:
data_08 = data_08.drop(index=0)

In [99]:
data_08.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
1,2008,1,1,0,0,140,4.1,5.1,99,99,99,999,9999,-0.3,999,999,99,99
2,2008,1,1,1,0,100,3.1,3.6,99,99,99,999,9999,-0.4,999,999,99,99
3,2008,1,1,2,0,50,4.1,4.6,99,99,99,999,9999,-0.4,999,999,99,99
4,2008,1,1,3,0,340,4.6,5.1,99,99,99,999,9999,-0.4,999,999,99,99
5,2008,1,1,4,0,310,6.7,7.7,99,99,99,999,9999,-0.4,999,999,99,99


### Exploring the data for 2009.

In [87]:
data_09 = pd.read_csv('../data/buoy_CHII2/chii2h2009.csv')
print(data_09.shape)
data_09.head()

(8288, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2009,1,1,0,0,280,2.6,3.6,99,99,99,999,9999,-6.6,999,999,99,99
1,2009,1,1,1,0,260,3.1,4.1,99,99,99,999,9999,-6.7,999,999,99,99
2,2009,1,1,2,0,270,4.1,5.1,99,99,99,999,9999,-7.3,999,999,99,99
3,2009,1,1,3,0,270,4.6,5.7,99,99,99,999,9999,-7.7,999,999,99,99
4,2009,1,1,4,0,260,2.6,4.6,99,99,99,999,9999,-7.4,999,999,99,99


In [88]:
data_09.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_09.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2009,1,1,0,0,280,2.6,3.6,99,99,99,999,9999,-6.6,999,999,99,99
1,2009,1,1,1,0,260,3.1,4.1,99,99,99,999,9999,-6.7,999,999,99,99
2,2009,1,1,2,0,270,4.1,5.1,99,99,99,999,9999,-7.3,999,999,99,99
3,2009,1,1,3,0,270,4.6,5.7,99,99,99,999,9999,-7.7,999,999,99,99
4,2009,1,1,4,0,260,2.6,4.6,99,99,99,999,9999,-7.4,999,999,99,99


### Exploring the data for 2010.

In [89]:
data_10 = pd.read_csv('../data/buoy_CHII2/chii2h2010.csv')
print(data_10.shape)
data_10.head()

(8466, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2010,1,1,0,0,320,11.8,13.9,99,99,99,999,9999,-7.5,999,999,99,99
1,2010,1,1,1,0,300,9.8,12.9,99,99,99,999,9999,-8.5,999,999,99,99
2,2010,1,1,2,0,300,10.3,13.4,99,99,99,999,9999,-8.5,999,999,99,99
3,2010,1,1,3,0,300,10.3,13.4,99,99,99,999,9999,-9.0,999,999,99,99
4,2010,1,1,4,0,300,9.3,12.4,99,99,99,999,9999,-9.5,999,999,99,99


In [90]:
data_10.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_10.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2010,1,1,0,0,320,11.8,13.9,99,99,99,999,9999,-7.5,999,999,99,99
1,2010,1,1,1,0,300,9.8,12.9,99,99,99,999,9999,-8.5,999,999,99,99
2,2010,1,1,2,0,300,10.3,13.4,99,99,99,999,9999,-8.5,999,999,99,99
3,2010,1,1,3,0,300,10.3,13.4,99,99,99,999,9999,-9.0,999,999,99,99
4,2010,1,1,4,0,300,9.3,12.4,99,99,99,999,9999,-9.5,999,999,99,99


### Exploring the data for 2011.

In [93]:
data_11 = pd.read_csv('../data/buoy_CHII2/chii2h2011.csv')
print(data_11.shape)
data_11.head()

(8499, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2011,1,1,0,0,180,12.4,13.4,99,99,99,999,9999,11.1,999,999,99,99
1,2011,1,1,1,0,180,9.8,11.3,99,99,99,999,9999,9.8,999,999,99,99
2,2011,1,1,2,0,200,10.8,11.3,99,99,99,999,9999,11.8,999,999,99,99
3,2011,1,1,3,0,190,10.3,11.3,99,99,99,999,9999,10.9,999,999,99,99
4,2011,1,1,4,0,200,11.3,12.9,99,99,99,999,9999,12.2,999,999,99,99


In [94]:
data_11.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_11.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2011,1,1,0,0,180,12.4,13.4,99,99,99,999,9999,11.1,999,999,99,99
1,2011,1,1,1,0,180,9.8,11.3,99,99,99,999,9999,9.8,999,999,99,99
2,2011,1,1,2,0,200,10.8,11.3,99,99,99,999,9999,11.8,999,999,99,99
3,2011,1,1,3,0,190,10.3,11.3,99,99,99,999,9999,10.9,999,999,99,99
4,2011,1,1,4,0,200,11.3,12.9,99,99,99,999,9999,12.2,999,999,99,99


### Exploring the data for 2012.

In [95]:
data_12 = pd.read_csv('../data/buoy_CHII2/chii2h2012.csv')
print(data_12.shape)
data_12.head()

(8589, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDI,R WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2012,1,1,0,0,170,12.9,16.0,99,99,99,999,9999,2.9,999,999,99,99
1,2012,1,1,1,0,170,13.9,16.5,99,99,99,999,9999,3.0,999,999,99,99
2,2012,1,1,2,0,170,14.4,16.5,99,99,99,999,9999,3.2,999,999,99,99
3,2012,1,1,3,0,180,12.4,14.4,99,99,99,999,9999,3.7,999,999,99,99
4,2012,1,1,4,0,170,9.3,9.8,99,99,99,999,9999,3.9,999,999,99,99


In [96]:
data_12.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_12.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2012,1,1,0,0,170,12.9,16.0,99,99,99,999,9999,2.9,999,999,99,99
1,2012,1,1,1,0,170,13.9,16.5,99,99,99,999,9999,3.0,999,999,99,99
2,2012,1,1,2,0,170,14.4,16.5,99,99,99,999,9999,3.2,999,999,99,99
3,2012,1,1,3,0,180,12.4,14.4,99,99,99,999,9999,3.7,999,999,99,99
4,2012,1,1,4,0,170,9.3,9.8,99,99,99,999,9999,3.9,999,999,99,99


### Exploring the data for 2013.

In [100]:
data_13 = pd.read_csv('../data/buoy_CHII2/chii2h2013.csv')
print(data_13.shape)
data_13.head()

(30419, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSP,D GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2013,1,1,0,0,330,4.1,5.1,99,99,99,999,9999,-2.6,999,999,99,99
1,2013,1,1,1,0,330,5.1,6.2,99,99,99,999,9999,-3.0,999,999,99,99
2,2013,1,1,2,0,340,8.8,10.3,99,99,99,999,9999,-3.7,999,999,99,99
3,2013,1,1,3,0,350 1,0.3,12.4,99,99,99,999,9999,-5.1,999,999,99,99
4,2013,1,1,4,0,330,9.3,10.8,99,99,99,999,9999,-5.6,999,999,99,99


In [101]:
data_13.rename(mapper={
    'WDI': 'WDIR',
    'R WSP': 'WSPD',
    'D GST': 'GST'
}, axis=1, inplace=True)
data_13.head()

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSP,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2013,1,1,0,0,330,4.1,5.1,99,99,99,999,9999,-2.6,999,999,99,99
1,2013,1,1,1,0,330,5.1,6.2,99,99,99,999,9999,-3.0,999,999,99,99
2,2013,1,1,2,0,340,8.8,10.3,99,99,99,999,9999,-3.7,999,999,99,99
3,2013,1,1,3,0,350 1,0.3,12.4,99,99,99,999,9999,-5.1,999,999,99,99
4,2013,1,1,4,0,330,9.3,10.8,99,99,99,999,9999,-5.6,999,999,99,99


In [102]:
data_13.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR     object
WSP     float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP      int64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2014.

In [104]:
data_14 = pd.read_csv('../data/buoy_CHII2/chii2h2014.csv')
print(data_14.shape)
data_14.head()

(33141, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2014,1,1,0,0,280,2.6,3.6,99,99,99,999,9999,-10.4,999,999,99,99
1,2014,1,1,0,30,310,2.1,3.6,99,99,99,999,9999,-10.4,999,999,99,99
2,2014,1,1,0,45,290,3.1,4.1,99,99,99,999,9999,-10.4,999,999,99,99
3,2014,1,1,1,0,280,3.1,3.6,99,99,99,999,9999,-10.3,999,999,99,99
4,2014,1,1,1,15,290,3.1,4.1,99,99,99,999,9999,-10.3,999,999,99,99


In [105]:
data_14.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR     object
WSPD     object
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP      int64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2015.

In [106]:
data_15 = pd.read_csv('../data/buoy_CHII2/chii2h2015.csv')
print(data_15.shape)
data_15.head()

(34570, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2015,1,1,0,0,220,10.8,13.9,99,99,99,999,9999,-7.2,999,999.0,99,99
1,2015,1,1,0,15,220,11.8,15.4,99,99,99,999,9999,-7.2,999,999.0,99,99
2,2015,1,1,0,30,220,11.8,17.0,99,99,99,999,9999,-7.1,999,999.0,99,99
3,2015,1,1,0,45,220,12.4,17.0,99,99,99,999,9999,-6.9,999,999.0,99,99
4,2015,1,1,1,0,220,12.9,19.0,99,99,99,999,9999,-7.0,999,999.0,99,99


In [107]:
data_15.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP    float64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2016.

In [108]:
data_16 = pd.read_csv('../data/buoy_CHII2/chii2h2016.csv')
print(data_16.shape)
data_16.head()

(51583, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2016,1,1,0,0,250,9.8,11.3,99,99,99,999,9999,-2.4,999,-6.9,99,99
1,2016,1,1,0,10,240,8.8,10.8,99,99,99,999,9999,-2.4,999,-6.7,99,99
2,2016,1,1,0,20,270,8.8,10.3,99,99,99,999,9999,-2.2,999,-6.3,99,99
3,2016,1,1,0,30,260,7.2,8.2,99,99,99,999,9999,-2.2,999,-6.1,99,99
4,2016,1,1,0,40,270,8.2,9.3,99,99,99,999,9999,-2.4,999,-6.3,99,99


In [109]:
data_16.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP    float64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2017.

In [110]:
data_17 = pd.read_csv('../data/buoy_CHII2/chii2h2017.csv')
print(data_17.shape)
data_17.head()

(50474, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2017,1,1,0,0,280,8.8,10.3,99,99,99,999,9999,1.3,999,-6.2,99,99
1,2017,1,1,0,10,280,7.2,8.2,99,99,99,999,9999,1.2,999,-6.4,99,99
2,2017,1,1,0,20,280,7.2,7.7,99,99,99,999,9999,1.1,999,-6.2,99,99
3,2017,1,1,0,30,280,7.7,8.2,99,99,99,999,9999,1.0,999,-6.2,99,99
4,2017,1,1,0,40,280,7.7,8.8,99,99,99,999,9999,0.9,999,-6.3,99,99


In [111]:
data_17.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR     object
WSPD    float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP    float64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2018.

In [112]:
data_18 = pd.read_csv('../data/buoy_CHII2/chii2h2018.csv')
print(data_18.shape)
data_18.head()

(50269, 18)


Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
0,2018,1,1,0,0,300,9.3,10.8,99,99,99,999,9999,-11.2,999,-18.6,99,99
1,2018,1,1,0,10,310,8.8,9.8,99,99,99,999,9999,-11.5,999,-18.6,99,99
2,2018,1,1,0,20,300,9.8,10.3,99,99,99,999,9999,-11.6,999,-18.5,99,99
3,2018,1,1,0,30,310,9.3,10.8,99,99,99,999,9999,-11.9,999,-18.8,99,99
4,2018,1,1,0,40,300,9.8,11.3,99,99,99,999,9999,-12.1,999,-18.4,99,99


In [113]:
data_18.dtypes

#YY       int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
GST     float64
WVHT      int64
DPD       int64
APD       int64
MWD       int64
PRES      int64
ATMP    float64
WTMP      int64
DEWP    float64
VIS       int64
TIDE      int64
dtype: object

### Exploring the data for 2019.

In [None]:
# data_19 = pd.read_csv('../data/buoy_CHII2/chii2h2019.csv')

### Combining all of the dataframes into one giant dataframe.