# Generate Lists for Neural Net

In [2]:
import csv
import bq
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb

# Previous Labels
### Fishing
 - Purse seine
 - Longliner
 - Pots and traps
 - Trawler

### Non-fishing
 - Passenger
 - Tug/Pilot/Supply
 - Cargo/Tanker
 - Seismic vessel

# New Labels [and sublabels in brackets]
### Fishing
 - Drifting longlines
 - Fixed gear [Pots and traps, Set gillnets, Set lonlines]
 - Pole and line          
 - Purse seines        
 - Reefer              
 - Squid fishing               
 - Trawlers           
 - Trollers  
 - Squid
 

### Non-fishing
 - Passenger [Sailing, Motor Passenger]
 - Tug/Pilot/Supply [Tug, Piolot, Supply, Other]
 - Cargo/Tanker [Cargo, Tanker]
 - Seismic vessel [Seismic]

In [3]:
detail_to_general = {'Sailing':'Passenger',
                    'Motor passenger':'Passenger',
                    'Passenger':'Passenger',
                    'Tug':'Tug/Pilot/Supply',
                    'Pilot':'Tug/Pilot/Supply',
                    'Supply':'Tug/Pilot/Supply',
                    'Tug/Pilot/Supply':'Tug/Pilot/Supply',
                    'Cargo':'Cargo/Tanker',
                    'Tanker':'Cargo/Tanker',
                    'Cargo/Tanker':'Cargo/Tanker',
                    'Seismic vessel':'Seismic vessel',
                    'Drifting longlines':'Drifting longlines',
                    'Set longlines':'Fixed gear',       
                    'Pole and line':'Pole and line',
                    'Pots and traps':'Fixed gear',
                    'Purse seines':'Purse seines',
                    'Reefer':'Reefer',
                    'Set gillnets':'Fixed gear',
                    'Trawlers':'Trawlers',
                    'Trollers':'Trollers',
                     'Fishing vessel':'Fishing vessel',
                     "Squid":"Squid"
                    }

general_to_fishing = {'Passenger':"Non-fishing",
                     'Tug/Pilot/Supply':"Non-fishing",
                     'Cargo/Tanker': "Non-fishing",
                     "Seismic vessel": "Non-fishing",
                     "Drifting longlines":"Fishing",
                     "Pole and line":"Fishing",
                     "Purse seines":"Fishing",
                     "Reefer":"Non-fishing", # this is debatable
                     "Fixed gear":"Fishing",
                     "Squid":"Fishing",
                      "Trawlers":"Fishing",
                      "Trollers":"Fishing",
                      "Fishing vessel":"Fishing",
                     }

cats_without_subcats = ["Seismic vessel",
                        "Trawlers","Purse seines","Reefer",
                        "Squid","Seismic vessel","Drifting lonlines"
                       "Pole and line","Trollers"]

# Lists Previously Used by the Neural Net for Training and Testing
As of October 10, 2016, the following lists are used in the Nerual Net Classifier. Below are David's notes on what to change and use instead:

## ITU_Dec_2015_full_list.csv
 - 11774 vessels
 - It is strange that it only has the follwing categories:
```
     'FBT': _PASSENGER,
    'PA': _PASSENGER,
    'TUG': _TUG_PILOT_SUPPLY,
    'LOU': _PASSENGER,
    'GOU': _PASSENGER,
    'SLO': _PASSENGER,
    'VLR': _PASSENGER,
    'YAT': _PASSENGER,
    'RAV': _TUG_PILOT_SUPPLY,
    'LAN': _POTS_AND_TRAPS,
    # TODO: [bitsofbits] More 
```

This pdf gives all the values that are linked http://www.itu.int/net/ITU-R/terrestrial/mars/help/table-2.pdf

This is very weird because we could be including trawlers here.


## CLAVRegistryMatchingv5.csv
 - 4803 different vessels
 - I'm skeptical of the shiptype generated in this list because I know that when I concat the geartypes, it isn't always clear what the vessel type is
 - Bjorn now has newer matches

## KnownVesselCargoTanker.csv
 - 2285 vessels
 - no idea where they came from

## KristinaManualClassification.csv
 - 2184 vessels


## PyBossaNonFishing.csv
 - 153 tug boats

## AlexWManualNonFishing.csv
 - 218 vessles that are Tugs, Passengers, Tangers, Cargo

## EUFishingVesselRegister.csv
 - 6489 vessels that are matched to the EU list, with their EU list geartype. See http://ec.europa.eu/fisheries/fleet/index.cfm?method=Codification.Cod_gear for the geartype

## PeruvianSquidFleet.csv
 - Complied by Bjorn, 104 vessels

## WorldwideSeismicVesselDatabase4Dec15.csv
 - 169 vessels, with length included



 # Lists not Used
 - rivervessels_20160502.csv
 - verify5and24_20160318.csv
 - verify5and24_20160502.csv
 - FishingVesselsV2_HighConfidenceStudents_20160502.csv




# Lists We Are Going to Use


## KnownVesselCargoTanker.csv
 - 2285 vessels
 - no idea where they came from

## KristinaManualClassification.csv
 - 2184 vessels


## PyBossaNonFishing.csv
 - 153 tug boats

## AlexWManualNonFishing.csv
 - 218 vessles that are Tugs, Passengers, Tangers, Cargo

## EUFishingVesselRegister.csv
 - 6489 vessels that are matched to the EU list, with their EU list geartype. See http://ec.europa.eu/fisheries/fleet/index.cfm?method=Codification.Cod_gear for the geartype

## PeruvianSquidFleet.csv
 - Complied by Bjorn, 104 vessels

## WorldwideSeismicVesselDatabase4Dec15.csv
 - 169 vessels, with length included



 # Lists not Used
 - rivervessels_20160502.csv
 - verify5and24_20160318.csv
 - verify5and24_20160502.csv
 - FishingVesselsV2_HighConfidenceStudents_20160502.csv


In [4]:
df_cargotanker = pd.read_csv('../data/classification-list-sources/KnownVesselCargoTanker.csv')

In [5]:
df_cargotanker.groupby(['label']).count()

# Passenger [sailing, motor]
# Tug/Pilot/Supply [Tug, Piolot, Supply, Other]
# Cargo/Tanker [Cargo, Tanker]
# Seismic vessel [Seismic]

Unnamed: 0_level_0,mmsi
label,Unnamed: 1_level_1
Cargo,1128
Tanker,1156


In [6]:
cargotanker_map = {'Cargo': 'Cargo/Tanker', 
                   'Tanker': 'Cargo/Tanker'}

df_cargotanker['cargotanker_sublabel'] = df_cargotanker['label']
df_cargotanker['label'] = df_cargotanker['label'].map(cargotanker_map)


In [7]:
df_cargotanker = df_cargotanker.set_index('mmsi')
df_cargotanker=df_cargotanker.rename(columns = {'label':'cargotanker_label'})
df_cargotanker.head()

Unnamed: 0_level_0,cargotanker_label,cargotanker_sublabel
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1
111111110,Cargo/Tanker,Cargo
204708000,Cargo/Tanker,Cargo
205256290,Cargo/Tanker,Cargo
205258890,Cargo/Tanker,Cargo
205263390,Cargo/Tanker,Cargo


In [8]:
# df_cargotanker['cargotanker_fishing'] = 
newcolumn = ['Non-fishing' for i in range(len(df_cargotanker.index))]
newcolumn = pd.Series(newcolumn, index=df_cargotanker.index)
df_cargotanker['cargotanker_fishing'] = newcolumn
# df_cargotanker.index

In [9]:
df_cargotanker.index.name = 'mmsi'
df_cargotanker.to_csv('lists/cargotanker.csv')

In [19]:
df_kristina = pd.read_csv('../data/classification-list-sources/KristinaManualClassification.csv')
df_kristina.head()

# df_kristina.ix[np.nan(df_kristina['detail'])]#['detail'] = "" 
# df_kristina['detail'][0]# is np.nan

Unnamed: 0,mmsi,label,detail
0,10421670,Purse seine,
1,123450020,Purse seine,
2,123450800,Purse seine,
3,203226200,Passenger,sail
4,203745200,Passenger,yacht


In [20]:
# replace the detail with label
df_kristina['detail'] = df_kristina['detail'].fillna(df_kristina['label'])

In [21]:
df_kristina.head()

Unnamed: 0,mmsi,label,detail
0,10421670,Purse seine,Purse seine
1,123450020,Purse seine,Purse seine
2,123450800,Purse seine,Purse seine
3,203226200,Passenger,sail
4,203745200,Passenger,yacht


In [22]:
df_kristina.groupby(['label','detail'])['mmsi'].count()

label         detail         
Cargo         bulk carrier       564
              cargo              190
              container          153
              timber carrier       2
              vehicle carrier    118
Dredger       dredger              2
Longliner     Longliner           89
Passenger     ferry                1
              passenger           14
              sail               103
              yacht               53
Purse seine   Purse seine        105
Tanker        tanker             303
Trawler       Trawler            268
Tug           tug                 61
Unclassified  chemicals            1
              construction         1
              crane                1
              crew boat            6
              fish carrier         1
              oil tanker           1
              patrol               1
              pilot                2
              platform             3
              reefer              81
              research             7
        

In [23]:
df_subcat_map = {'bluk carrier':'Cargo',
         'cargo':'Cargo',
         'container':'Cargo',
         'timber carrier':'Cargo',
         'ferry':'Motor passenger',
         'sail':'Sailing',
          'yacht':'Passenger',
           'passenger':'Passenger',
          'tanker':'Tanker',
          'tug':'Tug',
           'reefer': 'Reefer',
           'pilot':'Pilot',
          'fish carrier':'Reefer',
         'Purse seine':'Purse seines',
         'Trawler':'Trawlers',
#         'Longliner':'Longliners' # ignore these longlines, as we don't know if they are drifting or set
         }

df_kristina = df_kristina[df_kristina['detail'].map(lambda x: x in df_subcat_map)]
df_kristina['detail']=df_kristina['detail'].map(df_subcat_map)
df_kristina['label'] = df_kristina['detail'].map(detail_to_general)
df_kristina = df_kristina.set_index('mmsi')


In [25]:
df_kristina.groupby(['label','detail']).count()

label,detail
Cargo/Tanker,Cargo
Cargo/Tanker,Tanker
Passenger,Motor passenger
Passenger,Passenger
Passenger,Sailing
Purse seines,Purse seines
Reefer,Reefer
Trawlers,Trawlers
Tug/Pilot/Supply,Pilot
Tug/Pilot/Supply,Tug


In [30]:
df_kristina = df_kristina.rename(columns = {'label':'kristina_label','detail':'kristina_sublabel'})
df_kristina.head()

Unnamed: 0_level_0,kristina_label,kristina_sublabel
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1
10421670,Purse seines,Purse seines
123450020,Purse seines,Purse seines
123450800,Purse seines,Purse seines
203226200,Passenger,Sailing
203745200,Passenger,Passenger


In [31]:
newcolumn = [ general_to_fishing[df_kristina['kristina_label'].ix[i]] for i in df_kristina.index]
newcolumn = pd.Series(newcolumn, index=df_kristina.index)
df_kristina['kristina_fishing'] = newcolumn

In [32]:
df_kristina.to_csv('lists/kristina.csv')

In [33]:
df_kristina.head()

Unnamed: 0_level_0,kristina_label,kristina_sublabel,kristina_fishing
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10421670,Purse seines,Purse seines,Fishing
123450020,Purse seines,Purse seines,Fishing
123450800,Purse seines,Purse seines,Fishing
203226200,Passenger,Sailing,Non-fishing
203745200,Passenger,Passenger,Non-fishing


In [34]:
df_tugs = pd.read_csv('../data/classification-list-sources/PyBossaNonFishing.csv',skiprows=[0])
df_tugs.head()

Unnamed: 0,mmsi,label
0,205203390,Tug
1,205252690,Tug
2,205264290,Tug
3,205273990,Tug
4,205360090,Tug


In [35]:
df_tugs.groupby(['label'])['mmsi'].count()

label
Tug    153
Name: mmsi, dtype: int64

In [36]:
df_tugs['tugs_label'] = df_tugs['label'].map(detail_to_general)
df_tugs = df_tugs.rename(columns = {'label':'tugs_sublabel'})
newcolumn = [ general_to_fishing[df_tugs['tugs_label'].ix[i]] for i in df_tugs.index]
newcolumn = pd.Series(newcolumn, index=df_tugs.index)
df_tugs['tugs_fishing'] = newcolumn
df_tugs = df_tugs.set_index('mmsi')
df_tugs.to_csv('lists/tugs.csv')
df_tugs.head()

Unnamed: 0_level_0,tugs_sublabel,tugs_label,tugs_fishing
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
205203390,Tug,Tug/Pilot/Supply,Non-fishing
205252690,Tug,Tug/Pilot/Supply,Non-fishing
205264290,Tug,Tug/Pilot/Supply,Non-fishing
205273990,Tug,Tug/Pilot/Supply,Non-fishing
205360090,Tug,Tug/Pilot/Supply,Non-fishing


In [37]:
df_Alex = pd.read_csv('../data/classification-list-sources/AlexWManualNonFishing.csv')

In [38]:
df_Alex.groupby(['label']).count()

Unnamed: 0_level_0,mmsi
label,Unnamed: 1_level_1
Cargo,10
Dredger,1
Longliner,3
Passenger,142
Tanker,20
Trawler,1
Tug,40


In [39]:
alex_subcat_map = {'Cargo':'Cargo',
         'Dredger':'Tug/Pilot/Supply',
         'Longliner':'Drifting longlines', # ignore these, as we don't know if they are drifting or set
         'Passenger':'Passenger',
         'Tanker':'Tanker',
         'Trawler':'Trawlers',
                   'Tug':'Tug'
         }

df_Alex['label'] = df_Alex['label'].map(alex_subcat_map)
df_Alex['alex_label'] = df_Alex['label'].map(detail_to_general)

In [40]:
df_Alex = df_Alex.rename(columns = {'label':'alex_sublabel'})
newcolumn = [ general_to_fishing[df_Alex['alex_label'].ix[i]] for i in df_Alex.index]
newcolumn = pd.Series(newcolumn, index=df_Alex.index)
df_Alex['alex_fishing'] = newcolumn


df_Alex.head()

Unnamed: 0,mmsi,alex_sublabel,alex_label,alex_fishing
0,203310200,Passenger,Passenger,Non-fishing
1,205269190,Passenger,Passenger,Non-fishing
2,205596910,Passenger,Passenger,Non-fishing
3,211123610,Passenger,Passenger,Non-fishing
4,211149170,Passenger,Passenger,Non-fishing


In [41]:
df_Alex.to_csv('lists/alex.csv')

In [42]:

df_Alex.groupby(['alex_label']).count()

Unnamed: 0_level_0,mmsi,alex_sublabel,alex_fishing
alex_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cargo/Tanker,30,30,30
Drifting longlines,3,3,3
Passenger,142,142,142
Trawlers,1,1,1
Tug/Pilot/Supply,41,41,41


In [43]:
df_seismic = pd.read_csv('../data/classification-list-sources/WorldwideSeismicVesselDatabase4Dec15.csv')
df_seismic = df_seismic.rename(columns = {'MMSI #':'mmsi','Label':'seismic_label','Vessel length (m)':'seismic_length'})
df_seismic = df_seismic[['mmsi','seismic_label']]
df_seismic = df_seismic.drop_duplicates()
df_seismic = df_seismic.set_index('mmsi')
df_Alex = df_Alex.rename(columns = {'label':'alex_sublabel'})

newcolumn = [ general_to_fishing[df_seismic['seismic_label'].ix[i]] for i in df_seismic.index]
newcolumn = pd.Series(newcolumn, index=df_seismic.index)
df_seismic['seismic_fishing'] = newcolumn
df_seismic.head()


Unnamed: 0_level_0,seismic_label,seismic_fishing
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1
209108000,Seismic vessel,Non-fishing
209587000,Seismic vessel,Non-fishing
210228000,Seismic vessel,Non-fishing
210582000,Seismic vessel,Non-fishing
212338000,Seismic vessel,Non-fishing


In [44]:
# df_seismic = pd.read_csv('../data/classification-list-sources/WorldwideSeismicVesselDatabase4Dec15.csv')
# # column 'Vessel length (m)' has the length im neters
# # df_seismic.groupby(['Label'])['MMSI #'].count()
# df_seismic = df_seismic.rename(columns = {'MMSI #':'mmsi','Label':'seismic_label','Vessel length (m)':'seismic_length'})
# df_seismic = df_seismic[['seismic_label','seismic_length']]

df_seismic['seismic_sublabel'] = df_seismic['seismic_label']

# df_seismic['seismic_label']=['seismic' for s in df_seismic['mmsi']]

In [45]:
df_seismic.head()

Unnamed: 0_level_0,seismic_label,seismic_fishing,seismic_sublabel
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
209108000,Seismic vessel,Non-fishing,Seismic vessel
209587000,Seismic vessel,Non-fishing,Seismic vessel
210228000,Seismic vessel,Non-fishing,Seismic vessel
210582000,Seismic vessel,Non-fishing,Seismic vessel
212338000,Seismic vessel,Non-fishing,Seismic vessel


In [46]:
df_seismic.to_csv("lists/seismic.csv")


In [47]:
df_river = pd.read_csv('../data/classification-list-sources/rivervessels_20160502.csv')
df_river.groupby(['label']).count()

Unnamed: 0_level_0,mmsi
label,Unnamed: 1_level_1
Supply,1
bad_data,9
cargo,104
multiple_vessles,1
not_fishing,11
not_known,16
passenger,7
tanker,3


In [48]:
df_river = df_river.set_index('mmsi')
df_river = df_river.rename(columns = {'label':'river_sublabel'})
river_subcat_map = {'cargo':'Cargo',
         'passenger':'Passenger',
         'Supply':'Supply',
         'Passenger':'Passenger',
         'tanker':'Tanker',
         }

df_river = df_river[df_river['river_sublabel'].map(lambda x: x in river_subcat_map)]
df_river['river_sublabel']=df_river['river_sublabel'].map(river_subcat_map)
df_river['river_label'] = df_river['river_sublabel'].map(detail_to_general)




In [49]:
newcolumn = [ general_to_fishing[df_river['river_label'].ix[i]] for i in df_river.index]
newcolumn = pd.Series(newcolumn, index=df_river.index)
df_river['river_fishing'] = newcolumn

In [50]:
df_river.head()

Unnamed: 0_level_0,river_sublabel,river_label,river_fishing
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
203999396,Passenger,Passenger,Non-fishing
203999399,Passenger,Passenger,Non-fishing
211169130,Passenger,Passenger,Non-fishing
211489990,Cargo,Cargo/Tanker,Non-fishing
211512450,Passenger,Passenger,Non-fishing


In [51]:
df_river.to_csv('lists/river.csv')

In [52]:
df = pd.read_csv('../data/classification-list-sources/verify5and24_20160318.csv')

In [53]:
df.groupby(['label']).count()

Unnamed: 0_level_0,task id,mmsi
label,Unnamed: 1_level_1,Unnamed: 2_level_1
Diving_ops,80,80
Dredging_or_underwater_ops,112,112
Law_enforcement,131,131
Medical_transport,15,15
Military_ops,166,166
Passenger,147,147
Pleasure_craft,107,107
Search_and_Rescue,164,164
Tanker,153,153
Towing,4,4


In [54]:
df_five24 = pd.read_csv('../data/classification-list-sources/verify5and24_20160502.csv')
df_five24.head()

Unnamed: 0,task id,mmsi,label
0,2031,244690101,cargo
1,2572,211512210,cargo
2,2587,211668930,cargo
3,2608,244660859,cargo
4,2609,258222000,cargo


In [55]:
df_five24.groupby(['label']).count()

Unnamed: 0_level_0,task id,mmsi
label,Unnamed: 1_level_1,Unnamed: 2_level_1
Diving_ops,80,80
Dredging_or_underwater_ops,112,112
Law_enforcement,133,133
Medical_transport,15,15
Military_ops,168,168
OffShore Support vessel,8,8
Passenger,193,193
Passenger and Cargo,4,4
Passenger/ferry,1,1
Patrol vessel,3,3


# Ignore verify5and24_20160318.csv and use verify5and24_20160502

In [56]:
df_five24 = pd.read_csv('../data/classification-list-sources/verify5and24_20160502.csv')


In [57]:
df_five24 = df_five24.set_index('mmsi')
df_five24 = df_five24.rename(columns = {'label':'five24_sublabel'})

five24_subcat_map = {'Passenger':'Passenger',
         'Sailing':'Sailing',
         'Trawler':'Trawlers',
         'Tug':'Tug',
         'cargo':'Cargo',
         'Port_tender':'Pilot',
         'sailing':'Sailing',
         'passenger/ferry':'Passenger',
         'tanker':'Tanker'
         }

df_five24 = df_five24[df_five24['five24_sublabel'].map(lambda x: x in five24_subcat_map)]
df_five24['five24_sublabel']=df_five24['five24_sublabel'].map(five24_subcat_map)
df_five24['five24_label'] = df_five24['five24_sublabel'].map(detail_to_general)
df_five24 = df_five24[['five24_label','five24_sublabel']]

newcolumn = [ general_to_fishing[df_five24['five24_label'].ix[i]] for i in df_five24.index]
newcolumn = pd.Series(newcolumn, index=df_five24.index)
df_five24['five24_fishing'] = newcolumn

df_five24.to_csv('lists/five24.csv')

df_five24.groupby(['five24_label']).count()

Unnamed: 0_level_0,five24_sublabel,five24_fishing
five24_label,Unnamed: 1_level_1,Unnamed: 2_level_1
Cargo/Tanker,32,32
Passenger,358,358
Trawlers,1,1
Tug/Pilot/Supply,261,261


# Ignore FishingVesselsV2_HighConfidenceStudents_20160502.csv -- doesn't add many vessels

In [58]:
df_five24.head()

Unnamed: 0_level_0,five24_label,five24_sublabel,five24_fishing
mmsi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
244690101,Cargo/Tanker,Cargo,Non-fishing
211512210,Cargo/Tanker,Cargo,Non-fishing
211668930,Cargo/Tanker,Cargo,Non-fishing
244660859,Cargo/Tanker,Cargo,Non-fishing
258222000,Cargo/Tanker,Cargo,Non-fishing


In [59]:
df_ITU = pd.read_csv('../data/classification-list-sources/ITU_Dec_2015_full_list.csv')
df_ITU.head()

Unnamed: 0,MMSI,Call sign,Selective call number,Name of station,Administration/ Geographical area,Lifeboats,Auxiliary installations (EPIRBs),General classification,Individual classification,Registration number,Gross tonnage,Capacity of persons on board,Telegraph transmission frequency bands,Telegraph transmission frequency bands 2,Accounting Authority Identification Code (AAIC)
0,203002100,OEX2217,,ERIKA,AUT,,,PL,MTB,W21365,,8,,V,AU01
1,203058200,OEX2794,,TUAT GUAT,AUT,1.0,BE1,PL,YAT,N27794,1968.0,8,,V,AU01
2,203111400,OEX4631,,YLVI,AUT,,BE1,PL,MTB,N31105,299.0,6,,V,AU01
3,203116200,OEX6722,,SEVENS,AUT,1.0,BE1,PL,YAT,N29009,1872.0,12,,V,
4,203125100,OEX2289,,N-23782,AUT,,,PL,MTB,N23782,19.0,9,,V,


In [60]:
df_ITU.groupby(['Individual classification']).count()['MMSI']

Individual classification
AUX           1
BLK          16
BLS           1
CA           35
CAB           1
CHA           1
CHR           1
CON           9
CTR           1
DRG          54
EXP           5
GRF           1
HYD           6
LNG           2
LPG           1
MTB         116
OBO           5
OIL          32
PA            7
PH          116
PLT           7
PMX           1
PON           8
RAM           2
ROU           6
SAU           6
SMN           1
SRV           3
TPG          28
TPT           1
TUG          66
VLR           5
X X\nA        1
XX\n5         1
XXX       10949
YAT         203
Name: MMSI, dtype: int64

In [61]:
df_ITU = df_ITU.rename(columns = {'MMSI':'mmsi','Individual classification':'itu_nonfishing_sublabel'})

df_ITU = df_ITU.set_index('mmsi')

# that is a lot of categories... you can see the categories here:
# https://www.ofcom.org.uk/__data/assets/pdf_file/0024/16359/of168a.pdf
# I'm going to use the same distinctions that were used by Alex previously, 
# with some updates to divide into sailboats

itu_sublabels = {
    'FBT': 'Passenger',
 'PA': 'Passenger',
 'TUG': 'Tug',
 'LOU': "Sailing",
 'GOU': "Passenger", # these don't exist...
 'SLO': "Passenger",
 'VLR': "Sailing",
 'YAT': "Passenger",
 'RAV': "Tug/Pilot/Supply"}


df_ITU = df_ITU[df_ITU['itu_nonfishing_sublabel'].map(lambda x: x in itu_sublabels)]
df_ITU['itu_nonfishing_sublabel']=df_ITU['itu_nonfishing_sublabel'].map(itu_sublabels)
df_ITU['itu_nonfishing_label'] = df_ITU['itu_nonfishing_sublabel'].map(detail_to_general)
df_ITU = df_ITU[['itu_nonfishing_label','itu_nonfishing_sublabel']]

newcolumn = [ general_to_fishing[df_ITU['itu_nonfishing_label'].ix[i]] for i in df_ITU.index]
newcolumn = pd.Series(newcolumn, index=df_ITU.index)
df_ITU['itu_nonfishing_fishing'] = newcolumn

df_ITU.groupby(['itu_nonfishing_label']).count()

Unnamed: 0_level_0,itu_nonfishing_sublabel,itu_nonfishing_fishing
itu_nonfishing_label,Unnamed: 1_level_1,Unnamed: 2_level_1
Passenger,215,215
Tug/Pilot/Supply,66,66


In [63]:
df_ITU.to_csv('lists/itu_nonfishing.csv')