In [19]:
from __future__ import print_function
from __future__ import division
from collections import OrderedDict
import datetime as dt
import hdbscan
import logging
import matplotlib.pyplot as plt
import matplotlib.animation as mpl_animation
import numpy as np
import pandas as pd
from skimage import color
from IPython.display import HTML
from fleet_clustering import bq
from fleet_clustering import filters
from fleet_clustering import distances
from fleet_clustering import animation

In [2]:
ais_by_date = bq.load_ais_by_date('squid_jigger', dt.date(2017, 1, 1), dt.date(2017, 12, 31),
                                  fishing_only=False, min_km_from_shore=10)

2017-01-01




In [3]:
pruned_by_date = {k : filters.remove_chinese_coast(v) for (k, v) in ais_by_date.items()}

In [4]:
valid_ssvid = sorted(filters.find_valid_ssvid(pruned_by_date))
C = distances.create_composite_lonlat_array(pruned_by_date, valid_ssvid)

In [46]:
import imp; imp.reload(distances)
dists = distances.compute_distances_4(C, days=90, min_clip=1)
clusterer = hdbscan.HDBSCAN(metric='precomputed', min_cluster_size=10)
clusterer.fit(dists)

HDBSCAN(algorithm='best', allow_single_cluster=False, alpha=1.0,
    approx_min_span_tree=True, cluster_selection_method='eom',
    core_dist_n_jobs=4, gen_min_span_tree=False, leaf_size=40,
    match_reference_implementation=False, memory=Memory(cachedir=None),
    metric='precomputed', min_cluster_size=10, min_samples=None, p=None,
    prediction_data=False)

## Load Carrier Data

In [34]:
carriers_by_date = bq.load_carriers_by_year(2017, 2018)
pruned_carriers_by_date = {k : filters.remove_chinese_coast(v) for (k, v) in carriers_by_date.items()}
query = """
               SELECT CAST(mmsi AS STRING) FROM
               `world-fishing-827.vessel_database.all_vessels_20190102`
               WHERE  iscarriervessel AND confidence = 3
        """
valid_carrier_ssvid_df = pd.read_gbq(query, dialect='standard', project_id='world-fishing-827')
valid_carrier_ssvid = valid_carrier_ssvid_df.f0_
valid_carrier_ssvid_set = set(valid_carrier_ssvid)

## Load AIS data for plotting

In [35]:
all_ais_by_date = bq.load_ais_by_date('squid_jigger', dt.date(2017, 1, 1), dt.date(2017, 12, 31),
                                 fishing_only=False, min_km_from_shore=-1, include_carriers=True)

2017-01-01


KeyboardInterrupt: 

In [36]:
all_pruned_by_date = {k : filters.remove_chinese_coast(v) for (k, v) in all_ais_by_date.items()}

## Load Encounters Data
This is used to filter the carrier vessels down to only those that meet with longliners.

In [47]:
encounters = bq.load_carriers(2017, 2017)
all_fleet_ssvid_set = set([s for (s, f) in zip(valid_ssvid, clusterer.labels_) if f >= 0])
valid_ssvid_set = set(valid_ssvid)
all_longline_reefer_ssvid_set = set()
for x in encounters.itertuples():
    if x.ssvid_1 in all_fleet_ssvid_set and x.ssvid_2 in valid_carrier_ssvid_set:
        all_longline_reefer_ssvid_set.add(x.ssvid_2)
    if x.ssvid_2 in all_fleet_ssvid_set and x.ssvid_1 in valid_carrier_ssvid_set:
        all_longline_reefer_ssvid_set.add(x.ssvid_1)
all_longline_reefer_ssvid = sorted(all_longline_reefer_ssvid_set)

valid_ssvid_set = set(valid_ssvid)
carrier_ids = [x for x in all_longline_reefer_ssvid if x not in valid_ssvid_set]
joint_ssvid = valid_ssvid + sorted(carrier_ids) 
labels = list(clusterer.labels_) + [max(clusterer.labels_) + 1] * len(carrier_ids) 

## Set up Fleets
Set up the fleets for viewing.

In [48]:
counts = []
skip = [1, 6, 7, 9, 10, 23, 25]
for i in range(max(labels) + 1):
    if i in skip:
        counts.append(0)
    else:
        counts.append((np.array(labels) == i).sum())
        
fleet_ids = [x for x in np.argsort(counts)[::-1] if counts[x] > 0]
fleet_ids_without_carriers = [x for x in fleet_ids if x != max(labels)]

print(len(fleet_ids), "fleets")
fleets = OrderedDict()
n_hues = (len(fleet_ids) + 3) // 4
for i, fid in enumerate(fleet_ids_without_carriers):
    sat = [0.5, 1][i % 2]
    val = 1
    hue = np.linspace(0, 1, n_hues)[(i // 4) % n_hues]
    [[clr]] = color.hsv2rgb([[(hue, sat, val)]])
    fg = [(0, 0, 0), clr][(i // 2) % 2]
    sz = [5, 4][(i // 2) % 2]
    fleets[fid] = ('o', tuple(fg), tuple(clr), sz, 1, str(i + 1))
fleets[max(labels)] = ('1', 'k', 'k', 7, 2, 'Carrier Vessel')

2 fleets


## Create Animations

In [50]:
anim = animation.make_anim(joint_ssvid, 
                           labels, 
                           all_pruned_by_date, 
                           interval=3,
                           fleets=fleets, 
                           show_ungrouped=True,
                           alpha=1,
                           legend_cols=8,
                           ungrouped_legend="Ungrouped")
HTML(anim.to_html5_video())

In [32]:
anim = animation.make_anim(joint_ssvid, 
                           labels, 
                           all_pruned_by_date, 
                           interval=1,
                           fleets=fleets, 
                           show_ungrouped=True,
                           alpha=1,
                           legend_cols=8,
                           ungrouped_legend="Ungrouped")
Writer = mpl_animation.writers['ffmpeg']
writer = Writer(fps=8, metadata=dict(artist='Me'), bitrate=1800)
anim.save('fleet_squid.mp4', writer=writer)

In [45]:
x = np.array([1, 1, 1, 1, 1, 100, 100, 100, 100, 100])
np.sqrt((x ** 2).mean()), np.exp(np.mean(np.log(x)))

(70.71421356417676, 10.000000000000002)