In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import pickle

In [2]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Coding/Python/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style

style.use('ggplot')
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

get_ipython().magic('matplotlib qt')

In [3]:
# PLT_FIG_WIDTH = 3.487
PLT_FIG_WIDTH = 3.787
# PLT_FIG_WIDTH = 3.974
# PLT_FIG_WIDTH = 4.487
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

* ### Vessel-Dynamic Statistics I

In [4]:
mean_records_per_day = pd.read_csv('./data/csv/stats/mean_records_per_day.csv')
mmsis_no_of_records = pd.read_csv('./data/csv/stats/mmsis_no_of_records.csv')
no_of_records_per_day = pd.read_csv('./data/csv/stats/no_of_records_per_day.csv')
records_per_dayname = pd.read_csv('./data/csv/stats/records_per_dayname.csv')
records_per_week = pd.read_csv('./data/csv/stats/records_per_week.csv')

In [11]:
# out = pd.cut(mmsis_no_of_records['no_of_records'], [0, 2, 4, 8, 17, 35, 71, 142, 285, 570, 1141, 2282, 4565, 9130, 18260, 36520, 1168640]) 
out = pd.cut(mmsis_no_of_records['no_of_records'], [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 1168640]) 


ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.75, cmap='tab20', rot=40)
# ax.set_xticklabels([str(c).split(' ')[1].split(']')[0] for c in out.cat.categories])

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{\#AIS signals per vessel at the entire period of study}', fontsize=8, y=1)
plt.xlabel(r'\textbf{\#AIS signals}', fontsize=8)
plt.ylabel(r'\textbf{\#vessels}', fontsize=8)

# plt.savefig('Number of AIS signals at the entire period of study')
# plt.savefig(os.path.join('.', 'experimental_figures', 'figs_final', 'AIS_Signals_per_vessel_entire_study_period.pdf'), dpi=300, bbox_inches='tight')
plt.savefig(os.path.join('..', 'AIS_Signals_per_vessel_entire_study_period.pdf'), dpi=300, bbox_inches='tight')

In [12]:
# out = pd.cut(mean_records_per_day['records_per_day'], [0, 2, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 11000])
out = pd.cut(mean_records_per_day['records_per_day'], [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 11000])


ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.75, cmap='tab20', rot=30)
# ax.set_xticklabels([str(c).split(' ')[1].split(']')[0] for c in out.cat.categories])

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{\#AIS signals per vessel at daily basis}', fontsize=8, y=1)
plt.xlabel(r'\textbf{\#AIS signals}', fontsize=8)
plt.ylabel(r'\textbf{\#vessels}', fontsize=8)

# plt.savefig('Mean daily records per mmsi')
# plt.savefig(os.path.join('.', 'experimental_figures', 'figs_final', 'AIS_Signals_per_Day.pdf'), dpi=300, bbox_inches='tight')
plt.savefig(os.path.join('..', 'AIS_Signals_per_Day.pdf'), dpi=300, bbox_inches='tight')

In [14]:
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
no_of_records_per_day['day_name'] = pd.to_datetime(no_of_records_per_day.date).dt.weekday_name 
no_of_records_per_day['day_name'] = pd.Categorical(no_of_records_per_day['day_name'], categories=cats, ordered=True) 
no_of_records_per_day = no_of_records_per_day.sort_values('day_name')

no_of_records_per_day.reset_index(inplace=True, drop=True)
no_of_records_per_day.groupby('day_name').apply(sum).plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.75, legend=False, rot=30)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{\#records per weekday}', fontsize=8, y=1)
plt.xlabel(r'\textbf{}', fontsize=8)
plt.ylabel(r'\textbf{\#records}', fontsize=8)

plt.savefig(os.path.join('..', 'AIS_Signals_per_Weekday.pdf'), dpi=300, bbox_inches='tight')

In [15]:
ax = records_per_week.plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.75, rot=60, legend=False)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{\#records per week}', fontsize=8, y=1)
plt.xlabel(r'\textbf{week number (start:01/10/2015, end:31/03/2016)}', fontsize=8)
plt.ylabel(r'\textbf{\#records}', fontsize=8)

# plt.savefig(os.path.join('.', 'experimental_figures', 'figs_final', 'AIS_Signals_per_Week.pdf'), dpi=300, bbox_inches='tight')
plt.savefig(os.path.join('..', 'AIS_Signals_per_Week.pdf'), dpi=300, bbox_inches='tight')

* ### Vessel-Dynamic Statistics II (Velocity, Acceleration, Bearing)

In [4]:
%%time
ves_feat = []
for i in range(6):
    df = pd.read_csv(f'data/csv/nari_dynamic_vanilla_features/nari_dynamic_vanilla_features_{i}_no_smoothing.csv')
    ves_feat.append(df[['mmsi', 'velocity', 'acceleration', 'bearing']])
    
ves_feat = pd.concat(ves_feat, ignore_index=True)

Wall time: 33.7 s


In [5]:
out = pd.cut(ves_feat.velocity, [0, 10, 20, 30, 40, 50, 60, np.round(ves_feat.velocity.max())+1, np.round(ves_feat.velocity.max())+2])
ax = out.value_counts(sort=False).plot.area(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, cmap='tab20', rot=0)
ax.set_xticklabels([int(c.left) for c in out.cat.categories])

plt.suptitle(r'\textbf{Vessel speed distribution}', fontsize=8, y=1)
plt.xlabel(r'\textbf{speed (knots)}', fontsize=8)
plt.ylabel(r'\textbf{\#records}', fontsize=8)

plt.savefig(os.path.join('..', 'Vessel_Velocity_Distribution_V2.pdf'), dpi=300, bbox_inches='tight')

In [15]:
ves_feat.loc[ves_feat.velocity >= 999900].mmsi.unique()

array([304010909, 226338000], dtype=int64)

In [5]:
window=150
no_of_bins=[-94082, -3000, -1000, -10, -2, -0.5, -0.25, -0.1, -0.04, 0, 0.1, 0.25, 0.5, 2, 10, 1000, 3000, 94082] 

out = pd.cut(ves_feat.acceleration, no_of_bins)
ax = out.value_counts(sort=False).plot.area(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, cmap='tab20', rot=0)
ax.set_xticklabels([-3000, -10, -0.5, -0.1, 0, 0.5, 10, 3000])

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{Vessel acceleration distribution}', fontsize=8)
plt.xlabel(r'\textbf{acceleration (knots/s)}', fontsize=8)
plt.ylabel(r'\textbf{\#records}', fontsize=8)

# plt.savefig(os.path.join('.', 'experimental_figures', 'figs_final', 'Vessel_Acceleration_Distribution.pdf'), dpi=300, bbox_inches='tight')
plt.savefig(os.path.join('..', 'Vessel_Acceleration_Distribution.pdf'), dpi=300, bbox_inches='tight')

In [144]:
# pd.cut(ves_feat.bearing, [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360]).value_counts(sort=False).plot.area(figsize=(12,10), fontsize=14, cmap='tab20', title='Vessel Course Distribution', rot=15)
out = pd.cut(ves_feat.bearing, [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360], right=False).value_counts(sort=False)
ax = out.plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.75, cmap='tab20', rot=30)
# ax.set_xticklabels([c.left for c in out.cat.categories])

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.suptitle(r'\textbf{Vessel Course Distribution}', fontsize=8)
plt.xlabel(r'\textbf{Course over Ground (degrees)}', fontsize=8)
plt.ylabel(r'\textbf{\#Records}', fontsize=8)

# plt.savefig(os.path.join('..', 'Vessel_Course_Distribution_V2.pdf'), dpi=300, bbox_inches='tight')

Text(26.091412709068422, 0.5, '\\textbf{\\#Records}')

In [14]:
ves_feat.bearing.loc[~ves_feat.bearing.isin([0.0, 180.0])].dropna().value_counts()

270.004525    3799
89.995475     3791
89.999954     2781
270.000046    2566
105.073339    2500
89.999910     2481
285.077142    2459
284.735481    2355
270.000090    2354
104.731586    2345
104.480300    2329
284.484265    2305
270.000054    2293
283.847840    2284
162.876270    2233
103.843684    2216
342.878995    2175
89.999945     2093
89.999965     2079
270.000095    2074
270.000055    2068
104.109678    2028
89.999905     2008
284.113752    1996
285.413149    1990
89.999945     1958
270.000035    1951
105.409433    1924
105.828023    1905
285.831635    1897
              ... 
144.092852       1
268.703793       1
315.022613       1
11.514841        1
193.372359       1
64.366896        1
219.911322       1
196.075641       1
180.092312       1
19.139800        1
332.870310       1
119.033941       1
58.952737        1
2.464644         1
186.037944       1
100.804325       1
170.223376       1
358.533167       1
184.196726       1
199.329305       1
346.814717       1
357.211743  

In [68]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

PLT_FIG_WIDTH = 4
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

matplotlib.rcParams.update({'font.size': 8})

bins_number = 24  # the [0, 360) interval will be subdivided into this number of equal bins
degree_intervals = 15
# ticks = [r'$0^o$', r'$30^o$', r'$60^o$', r'$90^o$', r'$120^o$', r'$150^o$', r'$180^o$', r'$210^o$', r'$240^o$', r'$270^o$', r'$300^o$', r'$330^o$'] #12 bins
ticks = [r'${0}^o$'.format(degree_intervals*i) for i in range (bins_number)]
# ticks = [r'${0}^o$'.format(15*i) for i in range (bins_number)]

bins = np.linspace(0.0, 2 * np.pi, bins_number + 1)
angles = np.radians(ves_feat.bearing.loc[~ves_feat.bearing.isin([0.0, 180.0])].dropna().values)
n, bins, _ = plt.hist(angles, bins)


fig = plt.figure(figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT))
ax = plt.subplot(1, 1, 1, projection='polar')

width = 2 * np.pi / bins_number - 0.02
bars = ax.bar(bins[:bins_number], n, width=width, bottom=0.0, align='edge')

for bar in bars:
#     print(bar)
    bar.set_facecolor(plt.cm.tab20(0))
    bar.set_alpha(1.0)
    
plt.suptitle('Vessel course polar chart', fontsize=8, y=1.02)

# lines, labels = plt.thetagrids(range(0, 360, 30), ticks, fontsize=8)
lines, labels = plt.thetagrids(range(0, 360, degree_intervals), ticks, fontsize=8)

plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax.yaxis.offsetText.set_visible(False)

ax.tick_params(pad=0)
ax.set_rlabel_position(140)
# ax.set_rlabel_position(220)
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
# ax.set_theta_offset(pi)

# for idx, an in enumerate(bins[:-1]):
#     try:
# #         print(idx, np.degrees(an), bins[idx])
# #         print (np.degrees(an))
#         plt.text(an+np.radians(5.5), bars[idx].get_height()+100000, str(np.around(bars[idx].get_height()/10**6, 2)), size=8)
#     except IndexError:
#         continue
# plt.yticks([])

plt.legend([r'\#records (x$10^6$)'], frameon=False, fancybox=False, shadow=False, loc='lower center', bbox_to_anchor=(0.5, -0.25))

plt.show()
plt.savefig(os.path.join('..', 'Vessel_Course_Distribution_V8.pdf'), dpi=300, bbox_inches='tight')

## Some Simulations - Selecting the Best Image Dimensions for a LaTeX Document

In [15]:
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

In [19]:
for i, PLT_FIG_WIDTH in enumerate(np.arange(3, 7.487, 0.487)):
    plt.figure()
    
    PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618
    
    print (f'Figure {i}: {PLT_FIG_WIDTH}, {PLT_FIG_HEIGHT}')
    
    out = pd.cut(mmsis_no_of_records['no_of_records'], [0, 2, 4, 8, 17, 35, 71, 142, 285, 570, 1141, 2282, 4565, 9130, 18260, 36520, 1168640]) 

    ax = out.value_counts(sort=False).plot.bar(figsize=(PLT_FIG_WIDTH,PLT_FIG_HEIGHT), fontsize=8, width=0.7, cmap='tab20', rot=35)
    ax.set_xticklabels([str(c).split(' ')[1].split(']')[0] for c in out.cat.categories])
    plt.title(r'\textbf{Number of AIS signals per vessel at the entire period of study}', fontsize=8)
    plt.xlabel(r'\textbf{Number of AIS signals}', fontsize=8)
    plt.ylabel(r'\textbf{Number of MMSIs}', fontsize=8)
    plt.savefig(os.path.join('.', 'experimental_figures', 'figs', f'tmp_fig_{i}.pdf'), dpi=300, bbox_inches='tight')
    
    plt.close('all')

Figure 0: 3.0, 1.8541409147095178
Figure 1: 3.487, 2.15512978986403
Figure 2: 3.974, 2.4561186650185416
Figure 3: 4.461, 2.7571075401730534
Figure 4: 4.948, 3.058096415327565
Figure 5: 5.4350000000000005, 3.359085290482077
Figure 6: 5.922000000000001, 3.6600741656365887
Figure 7: 6.409000000000001, 3.9610630407911005
Figure 8: 6.896000000000001, 4.262051915945612
Figure 9: 7.383000000000001, 4.563040791100124
