Data from https://tidesandcurrents.noaa.gov/waterlevels.html?id=9414290&units=standard&bdate=20190701&edate=20190801&timezone=GMT&datum=MLLW&interval=6&action=data
import math
import datetime
import pytz
import glob
import functools
import operator
import numpy
import pandas
import matplotlib.pyplot
import matplotlib.pylab
import seaborn
import sklearn.linear_model
import sklearn.metrics
import vtreat.cross_plan
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
date_fmt = '%Y/%m/%d %H:%M'
tz = pytz.utc
def parse_date(dtstr):
d0 = datetime.datetime.strptime(dtstr, date_fmt)
return d0.replace(tzinfo=tz)
base_date_time = datetime.datetime(2001, 1, 1, tzinfo=tz)
print("TZ NAME: {tz}".format(tz=base_date_time.tzname()))
2001-01-01 00:00:00+00:00
na_values = [ '', '-' ]
files = [f for f in glob.glob("tide_data/*.csv", recursive=False)]
files.sort()
tides = [pandas.read_csv(f, na_values=na_values) for f in files]
tides = pandas.concat(tides, axis=0)
tides.reset_index(inplace=True, drop=True)
tides.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Date |
Time (GMT) |
Predicted (ft) |
Preliminary (ft) |
Verified (ft) |
0 |
2017/01/01 |
00:00 |
1.849 |
NaN |
2.12 |
1 |
2017/01/01 |
00:06 |
1.695 |
NaN |
1.97 |
2 |
2017/01/01 |
00:12 |
1.543 |
NaN |
1.88 |
3 |
2017/01/01 |
00:18 |
1.393 |
NaN |
1.78 |
4 |
2017/01/01 |
00:24 |
1.247 |
NaN |
1.66 |
d0 = parse_date('2001/01/01 00:00')
(d0 - base_date_time).total_seconds()
print("TZ NAME: {tz}".format(tz=d0.tzname()))
tides['dt'] = [parse_date(tides['Date'][i] + ' ' + tides['Time (GMT)'][i]) for i in range(tides.shape[0])]
tides['dts'] = [(t - base_date_time).total_seconds() for t in tides['dt']]
tides['tide feet'] = tides['Verified (ft)'].copy()
null_posns = pandas.isnull(tides['tide feet'])
tides.loc[null_posns, 'tide feet'] = tides.loc[null_posns, 'Preliminary (ft)']
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Date |
Time (GMT) |
Predicted (ft) |
Preliminary (ft) |
Verified (ft) |
dt |
dts |
tide feet |
0 |
2017/01/01 |
00:00 |
1.849 |
NaN |
2.12 |
2017-01-01 00:00:00+00:00 |
504921600.0 |
2.12 |
1 |
2017/01/01 |
00:06 |
1.695 |
NaN |
1.97 |
2017-01-01 00:06:00+00:00 |
504921960.0 |
1.97 |
2 |
2017/01/01 |
00:12 |
1.543 |
NaN |
1.88 |
2017-01-01 00:12:00+00:00 |
504922320.0 |
1.88 |
3 |
2017/01/01 |
00:18 |
1.393 |
NaN |
1.78 |
2017-01-01 00:18:00+00:00 |
504922680.0 |
1.78 |
4 |
2017/01/01 |
00:24 |
1.247 |
NaN |
1.66 |
2017-01-01 00:24:00+00:00 |
504923040.0 |
1.66 |
numpy.mean(tides['tide feet'])
deltas = [tides['dts'][i+1] - tides['dts'][i] for i in range(tides.shape[0]-1)]
tides.to_pickle('tides.pickle.gz')