# Voyage Classification

Now the final segmentation is selected, the k means classification is applied to the whole dataset to classify the behavior of a ship at every point on it's voyage

In [1]:
%matplotlib inline

# base libraries
import numpy as np
import pandas as pd
import datetime
from random import randint
from sklearn.externals import joblib
import os
import json
import matplotlib.pyplot as plt
import seaborn as sns
import folium as fm
from folium.plugins import HeatMap
import branca

In [2]:
config_path = os.path.abspath('..')

with open(config_path + '/config.json', 'r') as f:
    config = json.load(f)

processing_path = config['DEFAULT']['processing_path']
shipping_rot_filename = config['DEFAULT']['shipping_rot_filename']
kmeans_model = config['DEFAULT']['kmeans_model']
segment_variables_filename = config['DEFAULT']['all_segment_variables_filename']
segment_filename = config['DEFAULT']['segment_filename']

In [3]:
# import data
segment_variables = pd.read_csv(processing_path + segment_variables_filename,header = 0,delimiter = ',')
segment_variables['dt'] = pd.to_datetime(segment_variables["dt"])

## 2 min window

For each data point a 2 min window is extracted to calculate the ROT and SOG bands

In [4]:
# define the window length in seconds
window_duration = 120
# unique list of MMSIs
mmsi_list = set(segment_variables['MMSI'])
# set the index to MMSI and dt
df_new = segment_variables.set_index(['MMSI','dt'])

In [5]:
# loop through each MMSI and extract a portion of points
# the end point is random, the start point is defined by 'window_duration'
sog_rot_dict = {}
for curr_mmsi in mmsi_list:
    
    tmpt2 = df_new.loc[curr_mmsi,:]
    for row_idx, row in tmpt2.iterrows():
        window_end = row_idx
        window_start = window_end - datetime.timedelta(seconds = window_duration)
        tmp2t2 = tmpt2.loc[window_start:window_end]
        tmp2t2 = tmp2t2.sum()
        
        sog_rot_dict[(curr_mmsi,window_end)] = tmp2t2.to_dict()        

In [6]:
# convert the dictionary to a dataframe
seg_prep_2min = pd.DataFrame.from_dict(sog_rot_dict, "index")

In [7]:
# function to convert a value to the percentage of the row total
def get_pct(x):
    return(x/(x.sum()))

In [8]:
# convert the SOG to percentage distribution across all the bins
sog_all = seg_prep_2min[['SOG_0','SOG_1','SOG_2','SOG_3','SOG_4','SOG_5', 'SOG_6']]
sog_all = sog_all.apply(get_pct,axis = 1)

In [9]:
# convert the ROT to percentage distribution across all the bins
rot_all = seg_prep_2min[['rot_0','rot_1','rot_2','rot_3','rot_4']]
rot_all = rot_all.apply(get_pct,axis = 1)

In [10]:
seg_prep_all = sog_all.merge(rot_all,left_index = True,right_index = True,how = 'inner')

In [11]:
seg_prep_all = seg_prep_all.reset_index()
seg_prep_all.rename(columns = {'level_0':'MMSI','level_1':'dt'},inplace=True)

## Apply segmentation
Generate a classification for every transmitted AIS point for all ships in the sample

In [12]:
kmeans = joblib.load(processing_path + kmeans_model)

In [13]:
var_list = ['SOG_0','SOG_1','SOG_2','SOG_3','SOG_4','SOG_5', 'SOG_6','rot_0','rot_1','rot_2','rot_3','rot_4']

In [14]:
# generate the segments
# the order segments may vary each time the clusters are generated. In this case the mapping of segments 
# below will need to be updated.
scored = kmeans.predict(seg_prep_all[var_list])
scored = pd.DataFrame(scored)
scored.columns = ['segment']

In [15]:
# merge segments
scored6 = scored.copy()
scored6['segment'] = scored6['segment'].map({0:0, 1:1, 2:2, 3:3, 4:4, 5:0, 6:0, 7:5})
scored6['segment'] = scored6['segment'].astype(int)

In [16]:
# join the segment classification  back onto the orignal data
scored6 = seg_prep_all.merge(scored6,left_index = True,right_index = True,how = 'inner')

In [17]:
# rejoin lat and long
dtype_dic = {'MMSI':int,'dt':'str', 'lat':'float', 'long':'float','SOG':'float', 'rot':'float', 
             'Type':'str', 'gross_tonnage':'float','vessel_name':'str', 'ETA':'str', 'POC_LOCODE':'str',
             'last_port_LOCODE':'str', 'next_port_LOCODE':'str', 'status':'str','voyage_id':'float','tripid':int,
            'in_hazmat':'str','out_hazmat':'str'}
parse_dates = ['dt', 'ETA']

shipping_data = pd.read_csv(processing_path + shipping_rot_filename,header = 0,delimiter = ',',dtype = dtype_dic, parse_dates=parse_dates)
scored6_wll = scored6.merge(shipping_data[['MMSI','dt','lat','long','tripid','status']],how = 'inner', on = ['MMSI','dt'])

## View voyage classification

The six segments hold an implicit temporal order, suggesting that a ship entering port with the intention to dock will pass through each of the segments in a specific order. The ship will start by transitioning into the port and then pass into the general transition segment. It will then begin the docking procedure, this will consist of movement though each of the three docking modes, initial, mid phase and onto the terminal phase.

To further validate the segments the segmentation the maps below show the journeys for individual ships and there transition through the six segments.

In [18]:
# assign a colour to each point
colour_dict = {0:'lightgreen',
               1:'white',
               2:'pink',
               3:'orange',
               4:'darkgreen',
               5:'red'}

scored6_wll['colours'] = scored6_wll['segment'].map(colour_dict)

In [19]:
map_location = [51.9506,1.295]
map_zoom = 13
map_width = '32%'
map_height = '100%'
map_position='absolute'

In [20]:
# data for one ship and one trip
tran_ship1 = scored6_wll[(scored6_wll.MMSI == 319922000)&(scored6_wll.tripid == 4)&(scored6_wll.status == 'In Port')]
tran_ship2 = scored6_wll[(scored6_wll.MMSI == 205439000)&(scored6_wll.tripid == 1)&(scored6_wll.status == 'In Port')]
tran_ship3 = scored6_wll[(scored6_wll.MMSI == 209488000)&(scored6_wll.tripid == 1)&(scored6_wll.status == 'In Port')]

### Ships passing through port

The maps below gives the classified tracks of three ships that transition through Felixstowe. In the first two charts, both ships transition through the port with one turning east and the other heading north west. In both cases the behaviour of the ship is classified as transitional general phase throughout the journey. In the third chart the ship again travels north west through the port, however in this case there is a small period where the ship accelerates, moves into the transitional border phase before slowing down and returning to the transitional general phase.

In [21]:
# plot heatmap 
if 'map1' in locals():
    del map1
if 'map2' in locals():
    del map2
if 'map3' in locals():
    del map3

map1 = fm.Map(left = '0%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)
map2 = fm.Map(left = '33%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)
map3 = fm.Map(left = '66%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)

for i in range(0,len(tran_ship1)):   
    fm.CircleMarker(location=[tran_ship1.iloc[i]['lat'],tran_ship1.iloc[i]['long']],radius=3,
                    color = tran_ship1.iloc[i]['colours']).add_to(map1)
    
for i in range(0,len(tran_ship2)):   
    fm.CircleMarker(location=[tran_ship2.iloc[i]['lat'],tran_ship2.iloc[i]['long']],radius=3,
                    color = tran_ship2.iloc[i]['colours']).add_to(map2)

for i in range(0,len(tran_ship3)):   
    fm.CircleMarker(location=[tran_ship3.iloc[i]['lat'],tran_ship3.iloc[i]['long']],radius=3,
                    color = tran_ship3.iloc[i]['colours']).add_to(map3)

In [22]:
f1 = branca.element.Figure()
f1.add_child(map1)
f1.add_child(map2)
f1.add_child(map3)

### Ships that are docking

More interesting results are observed with ships that dock within the port. The ship in the left most map follows the expected progression through the behavioural segments, namely transition, declaration through the docking phases before finally docking. The ship in the second map is initially classified into the docking initial phase, this suggests that this ship enters port at a much lower speed and is slowing down. The ship then decelerates further, increases its rate of turn and moves through the docking mid and terminal phases. At this point, the ship accelerates and moves back into the docking mid phase segment before decelerating through the docking terminal phase and onto the docked segment. It is suggested that this behaviour is indicative of the ship manoeuvring into the prevailing current to aid the docking procedure. In the final chart the ship moves through the segments slowing down and increasing its manoeuvrability before entering the docking terminal phase segment. It then accelerates and enters the initial docking segment before decelerating, passing through the remaining docking segments and onto the docked segment.

In [23]:
dock_ship1 = scored6_wll[(scored6_wll.MMSI == 209719000)&(scored6_wll.tripid == 40)&(scored6_wll.status == 'In Port')]
dock_ship2 = scored6_wll[(scored6_wll.MMSI == 211226860)&(scored6_wll.tripid == 2)&(scored6_wll.status == 'In Port')]
dock_ship3 = scored6_wll[(scored6_wll.MMSI == 538003937)&(scored6_wll.tripid == 5)&(scored6_wll.status == 'In Port')]

In [24]:
if 'map4' in locals():
    del map4
if 'map5' in locals():
    del map5
if 'map6' in locals():
    del map6

map4 = fm.Map(left = '0%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)
map5 = fm.Map(left = '33%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)
map6 = fm.Map(left = '66%', top = '0%', position=map_position, location = map_location, zoom_start = map_zoom, width = map_width, height = map_height)

for i in range(0,len(dock_ship1)):   
    fm.CircleMarker(location=[dock_ship1.iloc[i]['lat'],dock_ship1.iloc[i]['long']],radius=3,
                    color = dock_ship1.iloc[i]['colours']).add_to(map4)
    
for i in range(0,len(dock_ship2)):   
    fm.CircleMarker(location=[dock_ship2.iloc[i]['lat'],dock_ship2.iloc[i]['long']],radius=3,
                    color = dock_ship2.iloc[i]['colours']).add_to(map5)

for i in range(0,len(dock_ship3)):   
    fm.CircleMarker(location=[dock_ship3.iloc[i]['lat'],dock_ship3.iloc[i]['long']],radius=3,
                    color = dock_ship3.iloc[i]['colours']).add_to(map6)

In [25]:
f2 = branca.element.Figure()
f2.add_child(map4)
f2.add_child(map5)
f2.add_child(map6)

## Exporting Data

In [26]:
# exporting data
scored6_wll.to_csv(processing_path + segment_filename,header=True,index=True,sep=',')

In [27]:
scored6_wll.describe()

Unnamed: 0,MMSI,SOG_0,SOG_1,SOG_2,SOG_3,SOG_4,SOG_5,SOG_6,rot_0,rot_1,rot_2,rot_3,rot_4,segment,lat,long,tripid
count,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0,3299386.0
mean,369319000.0,0.188858,0.1028404,0.02606657,0.01976332,0.03576172,0.2286306,0.3980794,0.3704453,0.3202367,0.1132516,0.09769457,0.09837183,2.419477,51.93969,1.396735,10.16787
std,157724600.0,0.3757229,0.2775379,0.1405653,0.121383,0.1712486,0.4102955,0.484182,0.3162618,0.2708669,0.1716859,0.1728584,0.1907219,1.715516,0.01850262,0.0992529,13.10186
min,205439000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.87583,1.277292,1.0
25%,244851000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08333333,0.07692308,0.0,0.0,0.0,0.0,51.92964,1.30555,2.0
50%,305211000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3076923,0.2857143,0.0,0.0,0.0,3.0,51.93948,1.365682,5.0
75%,538003900.0,0.0,0.0,0.0,0.0,0.0,0.07692308,1.0,0.6153846,0.5384615,0.1724138,0.1666667,0.08333333,4.0,51.95607,1.492142,12.0
max,636092800.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,51.97403,1.565783,85.0
