 # Wave Map Exploratory Data Analysis

In [2]:
import numpy as np
import pandas as pd
import pygrib
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
pd.options.plotting.backend = 'plotly'


 ## Wave Data

In [4]:
wave_data = pd.read_csv('data/wave_data.csv', header=[0,1], index_col=0, parse_dates=True)
wave_data.columns.names = ['Station', 'Signal']
station_data = pd.read_csv('data/wave_stations.csv', index_col=0)


In [6]:
wave_data


Station,helsinki-suomenlinna,helsinki-suomenlinna,peraemeri,peraemeri,pohjois-itaemeri,pohjois-itaemeri,selkaemeri,selkaemeri,suomenlahti,suomenlahti
Signal,Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m)
datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2019-01-01 00:00:00,,,,,233.0,3.4,216.0,2.8,250.0,2.7
2019-01-01 00:30:00,,,,,233.0,3.5,215.0,2.7,247.0,2.6
2019-01-01 01:00:00,,,,,226.0,3.3,213.0,2.6,250.0,2.9
2019-01-01 01:30:00,,,,,219.0,3.2,219.0,2.4,248.0,2.7
2019-01-01 02:00:00,,,,,226.0,3.4,223.0,2.4,246.0,2.6
...,...,...,...,...,...,...,...,...,...,...
2020-09-14 21:30:00,190.0,0.3,286.0,1.5,209.0,0.9,257.0,0.5,228.0,0.9
2020-09-14 22:00:00,203.0,0.3,283.0,1.5,204.0,0.9,261.0,0.4,225.0,0.8
2020-09-14 22:30:00,202.0,0.3,279.0,1.6,215.0,0.8,274.0,0.5,231.0,0.7
2020-09-14 23:00:00,205.0,0.3,274.0,1.5,211.0,0.8,285.0,0.5,241.0,0.6


In [8]:
wave_data.describe()


Station,helsinki-suomenlinna,helsinki-suomenlinna,peraemeri,peraemeri,pohjois-itaemeri,pohjois-itaemeri,selkaemeri,selkaemeri,suomenlahti,suomenlahti
Signal,Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m),Direction of waves (deg),Wave height (m)
count,18992.0,18993.0,13138.0,13137.0,29744.0,29746.0,27645.0,27644.0,27113.0,27113.0
mean,181.018797,0.312152,178.299285,0.696103,204.333782,1.293774,218.266956,1.015515,199.565965,0.845185
std,58.243906,0.225935,99.882044,0.515079,83.590847,0.917833,100.293184,0.775056,77.61667,0.603304
min,1.0,0.0,1.0,0.0,1.0,0.1,1.0,0.0,1.0,0.1
25%,159.5,0.1,73.0,0.3,178.0,0.6,174.0,0.5,116.0,0.4
50%,186.0,0.2,212.0,0.6,210.0,1.1,218.0,0.8,241.0,0.7
75%,202.0,0.4,232.0,0.9,244.0,1.8,306.0,1.4,248.0,1.2
max,359.0,1.4,360.0,4.0,360.0,7.6,360.0,8.1,360.0,4.1


In [10]:
print('Percentages of missing data:')
wave_data.isnull().mean()


Percentages of missing data:


Station               Signal                  
helsinki-suomenlinna  Direction of waves (deg)    0.364731
                      Wave height (m)             0.364698
peraemeri             Direction of waves (deg)    0.560543
                      Wave height (m)             0.560577
pohjois-itaemeri      Direction of waves (deg)    0.005084
                      Wave height (m)             0.005017
selkaemeri            Direction of waves (deg)    0.075294
                      Wave height (m)             0.075328
suomenlahti           Direction of waves (deg)    0.093089
                      Wave height (m)             0.093089
dtype: float64

In [12]:
station_data


Unnamed: 0,Observation station,Station ID,Latitude (decimals),Longitude (decimals),Time from,Time to,Data creation time
0,Helsinki Suomenlinna aaltopoiju,103976,60.12333,24.97283,2019-04-16T00:00:00.000Z,2020-09-14T23:59:59.000Z,2020-09-15T05:28:42.896Z
0,Perämeri aaltopoiju,137228,64.6841,23.238,2019-05-28T00:00:00.000Z,2020-09-14T23:59:59.000Z,2020-09-15T05:25:41.457Z
0,Pohjois-Itämeri aaltopoiju,134220,59.25,20.99683,2019-01-01T00:00:00.000Z,2020-09-14T23:59:59.000Z,2020-09-15T05:20:21.122Z
0,Selkämeri aaltopoiju,134246,61.8001,20.23267,2019-01-01T00:00:00.000Z,2020-09-14T23:59:59.000Z,2020-09-15T05:26:56.554Z
0,Suomenlahti aaltopoiju,134221,59.965,25.235,2019-01-01T00:00:00.000Z,2020-09-14T23:59:59.000Z,2020-09-15T05:23:59.322Z


In [14]:
# Divide wave height and direction
wave_height = wave_data.iloc[:,1::2]
wave_height.columns = wave_height.columns.droplevel(1)
wave_dir = wave_data.iloc[:,::2]
wave_dir.columns = wave_dir.columns.droplevel(1)


In [16]:
# Plot wave height
fig = wave_height.plot()
fig.update_layout(title='Wave height')
fig.show()


In [18]:
# Plot one month of wave height
fig = wave_height.loc['2020-01',:].plot()
fig.update_layout(title='Wave height')
fig.show()


In [20]:
# Plot wave height and direction
# One arrow per day
subsampling = 48
x,y = np.meshgrid(np.arange(0, wave_height.shape[0]/subsampling), np.arange(0, wave_height.shape[1] * 100, 100))
d = wave_dir.iloc[::subsampling,:].values.T
h = wave_height.iloc[::subsampling,:].values.T
u = np.sin(np.radians(d)) * h
v = np.cos(np.radians(d)) * h

x_axis = list(enumerate(wave_height.iloc[::subsampling,:].index))[::100]

fig = ff.create_quiver(x, y, u, v, scale=10)
fig.update_layout(title='Wave height and direction',
    yaxis=dict(tickmode='array', tickvals=[0,100,200,300,400], ticktext=list(wave_height.columns)),
    xaxis=dict(tickmode='array', tickvals=list(map(lambda x: x[0], x_axis)), ticktext=list(map(lambda x: str(x[1]), x_axis))))
fig.show()


In [22]:
# Plot missing values
x = np.array(wave_height.index).astype('datetime64[m]')
y = wave_height.columns
z = np.invert(wave_height.isnull().values).astype(int).T
colorsc = [[0, 'rgb(194,59,34)'],
            [0.5, 'rgb(194,59,34)'], 
            [0.5, 'rgb(0,179,30)'],
            [1, 'rgb(0,179,30)']]
fig = go.Figure(data=go.Heatmap(x=x, y=y, z=z, colorscale=colorsc,
    colorbar=dict(tickmode='array', tickvals=[0,1], ticktext=['null', 'value'])))
fig.update_layout(title='Missing values over time', xaxis=dict(side='top'))
fig.show()


 ## Weather Data

In [24]:
grbs = pygrib.open('data/weather_data.grib')


In [26]:
# Plot wind data on map
u_msg = grbs.select(shortName='10u')[0]
v_msg = grbs.select(shortName='10v')[0]
x, y = u_msg.latlons()
u = u_msg.values
v = v_msg.values

quiver_obj = plotly.figure_factory._quiver._Quiver(x, y, u, v,
    scale=.03, arrow_scale=0.3, angle=np.pi/9)
barb_x, barb_y = quiver_obj.get_barbs()
arrow_x, arrow_y = quiver_obj.get_quiver_arrows()

fig = go.Figure(data=go.Scattermapbox(lat=barb_x+arrow_x, lon=barb_y+arrow_y, mode='lines'))
fig.update_layout(title='Wind direction', mapbox_style='open-street-map',
    mapbox=dict(zoom=4, center=dict(lat=x.mean(), lon=y.mean())),
    margin=dict(l=0, r=0, t=50, b=0))
fig.show()


In [None]:
grbs.close()