## Script to create release locations of plastic from fishing activity
We use the Global Fishing Watch v2 Dataset found here: https://globalfishingwatch.org/data-download/datasets/public-fishing-effort

We first download the 'fleet-daily-csv-100-v2-YYYY.zip' files, and store them in the folder 'data/release/GlobalFishingWatch/'

We then run this script to create a .csv file with location, month, and summed fishing hours data.

In [1]:
import pandas as pd
import glob

input_data = '../../data/release/GlobalFishingWatch/'
output_data = '../../data/release/generated_files/'

In [2]:
files_fisheries = sorted(glob.glob(input_data+'fleet*/*') )

In [3]:
# New approach to storing the data - just store as lat/lon/month-year/sum(fishing_hours) - this is model independent!
data_fisheries = []
for file_ in files_fisheries:
    data_fisheries_day = pd.read_csv(file_)
    data_fisheries_day = data_fisheries_day[data_fisheries_day['fishing_hours'] > 0]
    data_fisheries.append(data_fisheries_day)

data_fisheries = pd.concat(data_fisheries, axis=0, ignore_index=True)

In [None]:
## Aggregate by location and date
agg_data_fisheries = data_fisheries.groupby(['cell_ll_lat', 'cell_ll_lon', 'date'])['fishing_hours'].agg('sum').reset_index()

In [None]:
## Aggregate by location and month
agg_data_fisheries['date'] = pd.to_datetime(agg_data_fisheries['date'])
agg_data_fisheries['month'] = agg_data_fisheries['date'].values.astype('datetime64[M]')
agg_data_fisheries = agg_data_fisheries.groupby(['cell_ll_lat', 'cell_ll_lon', 'month'])['fishing_hours'].agg('sum').reset_index()

In [None]:
# Store datafile
agg_data_fisheries.to_csv('../../data/release/GlobalFishingWatch/fleet_monthly_aggregated.csv')
## This chops it down from 70gb for one year to 2gb!