In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import pickle

In [None]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [None]:
PLT_FIG_WIDTH = 4.487
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

* ### Vessel-Dynamic Statistics I

In [None]:
mean_records_per_day = pd.read_csv('./data/csv/stats/mean_records_per_day.csv')
mmsis_no_of_records = pd.read_csv('./data/csv/stats/mmsis_no_of_records.csv')
no_of_records_per_day = pd.read_csv('./data/csv/stats/no_of_records_per_day.csv')
records_per_dayname = pd.read_csv('./data/csv/stats/records_per_dayname.csv')
records_per_week = pd.read_csv('./data/csv/stats/records_per_week.csv')

In [None]:
out = pd.cut(mmsis_no_of_records['no_of_records'], [0, 2, 4, 8, 17, 35, 71, 142, 285, 570, 1141, 2282, 4565, 9130, 18260, 36520, 1168640]) 

ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=7, width=0.95, cmap='tab20', rot=0)
ax.set_xticklabels([str(c).split(' ')[1].split(']')[0] for c in out.cat.categories])
plt.title('Number of AIS signals per vessel at the entire period of study', fontsize=7)
plt.xlabel('Number of AIS signals', fontsize=7)
plt.ylabel('Number of MMSIs', fontsize=7)
# plt.savefig('Number of AIS signals at the entire period of study')

In [None]:
out = pd.cut(mean_records_per_day['records_per_day'], [0, 2, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 11000])

ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=12, width=0.95, cmap='tab20', title='Number of AIS signals per vessel at a daily basis', rot=0)
ax.set_xticklabels([str(c).split(' ')[1].split(']')[0] for c in out.cat.categories])
plt.xlabel('Number of AIS records per day', fontsize=12)
plt.ylabel('Number of MMSIs', fontsize=12)
# plt.savefig('Mean daily records per mmsi')

In [None]:
records_per_week.plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=12, width=0.95, title='Number of Records per Week', rot=0, legend=False)
plt.xlabel('Week Number (Start:01-10-2015, Finish:31-03-2016)', fontsize=12)
plt.ylabel('#Records', fontsize=12)

In [None]:
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
no_of_records_per_day['day_name'] = pd.to_datetime(no_of_records_per_day.date).dt.weekday_name 
no_of_records_per_day['day_name'] = pd.Categorical(no_of_records_per_day['day_name'], categories=cats, ordered=True) 
no_of_records_per_day = no_of_records_per_day.sort_values('day_name')

no_of_records_per_day.reset_index(inplace=True, drop=True)
no_of_records_per_day.groupby('day_name').apply(sum).plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=12, width=0.95, legend=False, title='Number of Records per Weekday', rot=0)
plt.xlabel('')
plt.ylabel('#Records', fontsize=12)

* ### Vessel-Dynamic Statistics II (Velocity, Acceleration, Bearing)

In [None]:
%%time
ves_feat = []
for i in range(6):
    df = pd.read_csv(f'data/csv/nari_dynamic_vanilla_features/nari_dynamic_vanilla_features_{i}_no_smoothing.csv')
    ves_feat.append(df[['mmsi', 'velocity', 'acceleration', 'bearing']])
    
ves_feat = pd.concat(ves_feat, ignore_index=True)

In [None]:
out = pd.cut(ves_feat.velocity, [0, 10, 20, 30, 40, 50, 60, np.round(ves_feat.velocity.max())+1, np.round(ves_feat.velocity.max())+2])
ax = out.value_counts(sort=False).plot.area(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=14, cmap='tab20', title='Vessel Velocity Distribution', rot=0)
ax.set_xticklabels([c.left for c in out.cat.categories])
plt.xlabel('Velocity (knots; nautical miles per hour)', fontsize=14)
plt.ylabel('#Occurrences', fontsize=14)

In [None]:
window=150
no_of_bins=[-94082, -3000, -1000, -10, -2, -0.5, -0.25, -0.1, -0.04, 0, 0.1, 0.25, 0.5, 2, 10, 1000, 3000, 94082] 

out = pd.cut(ves_feat.acceleration, no_of_bins)
ax = out.value_counts(sort=False).plot.area(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=14, cmap='tab20', title='Vessel Acceleration Distribution', rot=10)
ax.set_xticklabels([-3000, -10, -0.5, -0.1, 0, 0.5, 10, 3000])
plt.xlabel('Acceleration', fontsize=14)
plt.ylabel('#Occurrences', fontsize=14)

In [None]:
# pd.cut(ves_feat.bearing, [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360]).value_counts(sort=False).plot.area(figsize=(12,10), fontsize=14, cmap='tab20', title='Vessel Course Distribution', rot=15)
out = pd.cut(ves_feat.bearing, [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360])
ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=11, width=0.95, cmap='tab20', title='Vessel Course Distribution', rot=0)
# ax.set_xticklabels([c.left for c in out.cat.categories])
plt.xlabel('Course', fontsize=11)
plt.ylabel('#Occurrences', fontsize=11)