In [178]:
# Import all necessary packages and functions

import pandas as pd
import matplotlib
import seaborn as sns
import numpy as np
import sqlalchemy as sq
import sql_functions as sf
import psycopg2 # for export to DBeaver: needed to get database exception errors when uploading dataframe

#pandas settings for big numbers (to not show numbers in scientific format)
pd.options.display.float_format = '{:.2f}'.format


# Fish catches in EU

## Step 1: Import files and create dataframe with individual variables

In [179]:
# import csv files
fish_catch_EU = pd.read_csv("data/X_Seafood Production Data/Fish_catch_EU_full_data.csv")

## Step 2: Inspect dataframe with head(), info(), shape, columns, tail(), describe()

In [180]:
# inspect dataframes - fish catch
display(fish_catch_EU.head(),
         fish_catch_EU.info(),
         fish_catch_EU.shape,
         fish_catch_EU.columns,
         fish_catch_EU.tail(),
         fish_catch_EU.describe() # doesn't work that because the values are objects - change data types
)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 307402 entries, 0 to 307401
Data columns (total 10 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   DATAFLOW     307402 non-null  object
 1   LAST UPDATE  307402 non-null  object
 2   freq         307402 non-null  object
 3   species      307402 non-null  object
 4   fishreg      307402 non-null  int64 
 5   unit         307402 non-null  object
 6   geo          307402 non-null  object
 7   TIME_PERIOD  307402 non-null  int64 
 8   OBS_VALUE    303991 non-null  object
 9   OBS_FLAG     19143 non-null   object
dtypes: int64(2), object(8)
memory usage: 23.5+ MB


Unnamed: 0,DATAFLOW,LAST UPDATE,freq,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
0,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,AAA,0,TLW,DK,2020,0,
1,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,AAA,0,TLW,EU27_2020,2019,0,
2,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,AAA,0,TLW,EU27_2020,2020,0,
3,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,AAA,0,TLW,EU28,2019,0,
4,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,AAA,0,TLW,IE,2019,0,


None

(307402, 10)

Index(['DATAFLOW', 'LAST UPDATE', 'freq', 'species', 'fishreg', 'unit', 'geo',
       'TIME_PERIOD', 'OBS_VALUE', 'OBS_FLAG'],
      dtype='object')

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
307397,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,ZGS,27,TLW,EU27_2020,2019,343,
307398,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,ZGS,27,TLW,EU27_2020,2020,408,
307399,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,ZGS,27,TLW,EU28,2016,1069,
307400,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,ZGS,27,TLW,EU28,2018,54,
307401,ESTAT:FISH_CA_MAIN(1.0),17/05/22 11:00:00,A,ZGS,27,TLW,EU28,2019,343,


Unnamed: 0,fishreg,TIME_PERIOD
count,307402.0,307402.0
mean,20.27,2010.19
std,16.83,6.2
min,0.0,2000.0
25%,0.0,2005.0
50%,27.0,2011.0
75%,34.0,2016.0
max,51.0,2020.0


## Step 3: Delete non necessary columns and rows

In [181]:
# delete columns
fish_catch_EU.drop(columns = ['DATAFLOW', 'LAST UPDATE', 'freq', 'OBS_FLAG'] , inplace=True)
fish_catch_EU.head()


Unnamed: 0,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE
0,AAA,0,TLW,DK,2020,0
1,AAA,0,TLW,EU27_2020,2019,0
2,AAA,0,TLW,EU27_2020,2020,0
3,AAA,0,TLW,EU28,2019,0
4,AAA,0,TLW,IE,2019,0


In [182]:
# delete rows with EU28 (refers to EU including UK before Brexit)
fish_catch_EU = fish_catch_EU[fish_catch_EU.geo != "EU28"] # logic: keep only rows without EU28, assign variable again.
fish_catch_EU.head()

Unnamed: 0,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE
0,AAA,0,TLW,DK,2020,0
1,AAA,0,TLW,EU27_2020,2019,0
2,AAA,0,TLW,EU27_2020,2020,0
4,AAA,0,TLW,IE,2019,0
5,AAA,0,TLW,IE,2020,0


## Step 4: Rename columns lower case, snake case, spaces, delimiters  

In [183]:
# Convert columns in lower case, change names + snake case
fish_catch_EU.columns = [x.lower() for x in fish_catch_EU.columns]
fish_catch_EU.rename(columns = {"fishreg" : "fishing_region", 
                                "geo" : "country", 
                                "time_period" : "year", 
                                "obs_value" : "volume"}, 
                                inplace=True, 
                                errors = "ignore") #errors =ignore to not get the warning message

fish_catch_EU.head()

Unnamed: 0,species,fishing_region,unit,country,year,volume
0,AAA,0,TLW,DK,2020,0
1,AAA,0,TLW,EU27_2020,2019,0
2,AAA,0,TLW,EU27_2020,2020,0
4,AAA,0,TLW,IE,2019,0
5,AAA,0,TLW,IE,2020,0


## Step 5: Rename row value names. First letter upper case e.g. EUR = Eur
## Step 6: Replace (country/species) abbreviations with full names by using dictionaries 

In [184]:
# Replace geo codes with country names using a dictionary + map-function 
# (relatively full set of country codes, may be useful for other files from Eurostat)
country_dict = {"BE" : "Belgium",
                "BG" : 	"Bulgaria",
                "CZ" : "Czech Repuplic",
                "DK" :	"Denmark",
                "DE" :	"Germany",
                "EE" :	"Estonia",
                "IE" :	"Ireland",
                "EL" :	"Greece",
                "ES" :	"Spain",
                "FR" :	"France", 
                "HR" :	"Croatia",
                "IT" : "Italy",
                "CY" : "Cyprus",
                "LV" : "Latvia",
                "LT" : "Lithuania",
                "LU" : "Luxembourg",
                "HU" : "Hungary",
                "MT" : "Malta",
                "NL" : "Netherlands",
                "AT" : "Austria",
                "PL" : "Poland",
                "PT" : "Portugal",
                "RO" : "Romania",
                "SI" : "Slovenia",
                "SK" : "Slovakia",
                "FI" : "Finland",
                "SE" : "Sweden",
                "UK" : "United Kingdom",
                "AL" : "Albania",
                "EU27_2020" : "European Union",
                "NO" : "Norway",
                "TR" : "Turkey",
                "IS" : "Iceland"}

In [185]:
# rename values within column country
fish_catch_EU["country"] = fish_catch_EU["country"].map(country_dict)
fish_catch_EU.head()

Unnamed: 0,species,fishing_region,unit,country,year,volume
0,AAA,0,TLW,Denmark,2020,0
1,AAA,0,TLW,European Union,2019,0
2,AAA,0,TLW,European Union,2020,0
4,AAA,0,TLW,Ireland,2019,0
5,AAA,0,TLW,Ireland,2020,0


In [186]:
# check unique values in dataset
fish_catch_EU["fishing_region"].unique() 
# create dictionary for fishing regions
fishing_regions_dict = {21 : "Atlantic, Northwest",
                        27 :	"Atlantic, Northeast",
                        34 :	"Atlantic, Eastern Central",
                        37 :	"Mediterranean and Black Sea",
                        41 :	"Atlantic, Southwest",
                        47 :	"Atlantic, Southeast",
                        51 : "Indian Ocean, Western",
                        0 : "All fishing regions"}
# rename values within column fishing_region
fish_catch_EU["fishing_region"] = fish_catch_EU["fishing_region"].map(fishing_regions_dict)
fish_catch_EU.head()

Unnamed: 0,species,fishing_region,unit,country,year,volume
0,AAA,All fishing regions,TLW,Denmark,2020,0
1,AAA,All fishing regions,TLW,European Union,2019,0
2,AAA,All fishing regions,TLW,European Union,2020,0
4,AAA,All fishing regions,TLW,Ireland,2019,0
5,AAA,All fishing regions,TLW,Ireland,2020,0


In [187]:
# change values in species column
# check number unique values in dataset
fish_catch_EU["species"].nunique()

1559

In [188]:
# Create list of unique values for creating dictionary
species_list = list(fish_catch_EU["species"].unique())
species_list[999:] # had to slice it for displaying full output in editor

['OQM',
 'OQT',
 'ORD',
 'ORY',
 'OSG',
 'OST',
 'OUB',
 'OUL',
 'OUM',
 'OUW',
 'OXN',
 'OXY',
 'OYC',
 'OYF',
 'OYG',
 'OYX',
 'PAA',
 'PAC',
 'PAG',
 'PAL',
 'PAN',
 'PAR',
 'PAT',
 'PAU',
 'PAX',
 'PBC',
 'PCB',
 'PCR',
 'PDZ',
 'PEE',
 'PEI',
 'PEL',
 'PEN',
 'PEQ',
 'PER',
 'PET',
 'PEW',
 'PEZ',
 'PFM',
 'PGA',
 'PHO',
 'PIA',
 'PIC',
 'PIL',
 'PIQ',
 'PIZ',
 'PJC',
 'PKE',
 'PKV',
 'PLA',
 'PLE',
 'PLN',
 'PLS',
 'PLY',
 'PLZ',
 'PNB',
 'PNI',
 'PNQ',
 'PNU',
 'PNV',
 'POA',
 'POC',
 'POD',
 'POI',
 'POK',
 'POL',
 'POP',
 'POR',
 'POS',
 'POX',
 'PPH',
 'PPX',
 'PRA',
 'PRC',
 'PRI',
 'PRP',
 'PRR',
 'PSB',
 'PSE',
 'PSK',
 'PSL',
 'PSS',
 'PTM',
 'PTY',
 'PUF',
 'PUX',
 'PVJ',
 'PVQ',
 'PVR',
 'PWT',
 'PXV',
 'PZB',
 'QFF',
 'QPH',
 'QPX',
 'QSC',
 'QSO',
 'QTV',
 'QUB',
 'QUL',
 'QZU',
 'RAE',
 'RAG',
 'RAJ',
 'RAQ',
 'RAT',
 'RAZ',
 'RBC',
 'RBF',
 'RBO',
 'RBX',
 'RCR',
 'RCT',
 'RCW',
 'RDC',
 'RDQ',
 'REA',
 'REB',
 'REC',
 'RED',
 'REG',
 'REL',
 'REO',
 'REQ',
 'RES',


In [189]:
# Create dictionary with list

species_dict = {'AAA' : 'Adriatic sturgeon',
'AAS' : 'Noble crayfish',
'ABK' : 'White bream',
'ABX' : 'Abalones nei',
'ABZ' : 'Small sandeel',
'ACC' : 'Ruffe',
'ACH' : 'Arctic char',
'AES' : 'Aesop shrimp',
'AFT' : 'Hooknose',
'AGD' : 'Sharktooth moray',
'AGI' : 'Polygon moray',
'AGK' : 'Brown moray',
'AGN' : 'Angelshark',
'AHH' : 'Mediterranean sand smelt',
'AHN' : 'Swallowtail seaperch',
'AJH' : 'None',
'AJQ' : 'Common jellyfish',
'AKJ' : 'European prickly cockle',
'AKL' : 'Scale-rayed wrasse',
'ALA' : 'Alexandria pompano',
'ALB' : 'Albacore',
'ALC' : 'Bairds slickhead',
'ALD' : 'Small-headed clingfish',
'ALE' : 'Alewife',
'ALF' : 'Alfonsinos nei',
'ALH' : 'Slickheads nei',
'ALI' : 'Lancetfishes nei',
'ALK' : 'Alaska pollock(=Walleye poll.)',
'ALM' : 'Unicorn leatherjacket filefish',
'ALN' : 'Scribbled leatherjac. filefish',
'ALR' : 'Bleak',
'ALS' : 'Silvertip shark',
'ALV' : 'Thresher',
'ALX' : 'Long snouted lancetfish',
'AMB' : 'Greater amberjack',
'AMD' : 'Chain moray',
'AMS' : 'Brine shrimp',
'AMV' : 'Speckled moray',
'AMX' : 'Amberjacks nei',
'ANA' : 'Argentine anchovy',
'ANB' : 'Bay anchovy',
'ANC' : 'Southern African anchovy',
'AND' : 'Agujon needlefish',
'ANE' : 'European anchovy',
'ANF' : 'Anglerfishes nei',
'ANG' : 'American angler',
'ANK' : 'Blackbellied angler',
'ANN' : 'Annular seabream',
'ANP' : 'Daggertooth',
'ANT' : 'Blue antimora',
'ANW' : 'Angelfishes nei',
'ANX' : 'Anchovies, etc. nei',
'AOM' : 'Painted eel',
'APB' : 'Siberian sturgeon',
'APE' : 'Starry sturgeon',
'APG' : 'Danube sturgeon(=Osetr)',
'APH' : 'Intermediate scabbardfish',
'API' : 'Deep-water catsharks',
'APL' : 'Aquatic plants nei',
'APO' : 'Cardinalfishes, etc. nei',
'APQ' : 'Iceland catshark',
'APR' : 'Sterlet sturgeon',
'APS' : 'Northern pink shrimp',
'APU' : 'Sturgeon',
'ARA' : 'Blue and red shrimp',
'ARB' : 'Bandtooth conger',
'ARG' : 'Argentines',
'ARI' : 'Aristeid shrimps nei',
'ARQ' : 'Rusty jobfish',
'ARS' : 'Giant red shrimp',
'ARU' : 'Greater argentine',
'ARV' : 'Striped red shrimp',
'ARY' : 'Argentine',
'ASB' : 'Beans sawtooth eel',
'ASD' : 'Allis shad',
'ASK' : 'Angelsharks, sand devils nei',
'ASN' : 'North Atlantic rockweed',
'ASP' : 'Caspian shads',
'ASU' : 'Asp',
'ATB' : 'Big-scale sand smelt',
'ATG' : 'Artic cod',
'ATP' : 'Sand smelt',
'ATQ' : 'Cape silverside',
'ATV' : 'East Siberian cod',
'AUL' : 'Australian blackspot catshark',
'AUR' : 'Rough-head sea catfish',
'AUU' : 'Canary damsel',
'AUX' : 'Giant catfish',
'AVR' : 'Green jobfish',
'AVX' : 'Sand smelts nei',
'AWM' : 'Fangtooth moray',
'AWP' : 'Gillbacker sea catfish',
'AXP' : 'Crucifix sea catfish',
'BAC' : 'Pickhandle barracuda',
'BAG' : 'Guinean barracuda',
'BAH' : 'Bastard halibut',
'BAI' : 'Batoid fishes nei',
'BAR' : 'Barracudas nei',
'BAS' : 'Combers nei',
'BAZ' : 'Barracudas, etc. nei',
'BBE' : 'Goiter blacksmelt',
'BBH' : 'Blueback shad',
'BBS' : 'Black scorpionfish',
'BDL' : 'Elongate frostfish',
'BDY' : 'None',
'BEB' : 'Coco sea catfish',
'BEN' : 'Needlefishes, etc. nei',
'BER' : 'None',
'BES' : 'None',
'BET' : 'Bigeye tuna',
'BFT' : 'Atlantic bluefin tuna',
'BGR' : 'Bastard grunt',
'BGX' : 'None',
'BGZ' : 'Pigsnout grunt',
'BHD' : 'Lusitanian toadfish',
'BHG' : 'Glacier lantern fish',
'BHY' : 'Bathyraja rays nei',
'BIB' : 'Pouting(=Bib)',
'BIC' : 'Bighead carp',
'BIF' : 'De Buens goby',
'BIG' : 'Bigeyes nei',
'BII' : 'None',
'BIL' : 'Marlins,sailfishes,etc. nei',
'BIP' : 'Striped bonito',
'BIS' : 'Bigeye scad',
'BIU' : 'None',
'BLB' : 'Blue butterfish',
'BLE' : 'Combtooth blennies',
'BLF' : 'Blackfin tuna',
'BLI' : 'Blue ling',
'BLL' : 'Brill',
'BLM' : 'Black marlin',
'BLP' : 'Patagonian blennie',
'BLT' : 'Bullet tuna',
'BLU' : 'Bluefish',
'BLV' : 'Queen triggerfish',
'BNF' : 'Longfin bonefish',
'BOA' : 'Bonga shad',
'BOB' : 'Atlantic seabob',
'BOC' : 'Boarfish',
'BOF' : 'Bonefish',
'BOG' : 'Bogue',
'BON' : 'Atlantic bonito',
'BOP' : 'Plain bonito',
'BOR' : 'Boarfishes nei',
'BOX' : 'None',
'BOY' : 'Purple dye murex',
'BPB' : 'Atlantic fanfish',
'BPI' : 'Blotched picarel',
'BQX' : 'Butterfishes nei',
'BRA' : 'None',
'BRB' : 'Black seabream',
'BRD' : 'Bearded brotula',
'BRF' : 'Blackbelly rosefish',
'BRI' : 'Bristlemouths',
'BRO' : 'Copper shark',
'BRS' : 'Serra Spanish mackerel',
'BRT' : 'Snaggletooth',
'BRU' : 'Southern rays bream',
'BRX' : 'Alfonsinos, etc. nei',
'BRZ' : 'Pomfrets, ocean breams nei',
'BSB' : 'Black seabass',
'BSC' : 'Bluespotted seabream',
'BSD' : 'None',
'BSE' : 'Seabasses nei',
'BSF' : 'Black scabbardfish',
'BSH' : 'Blue shark',
'BSK' : 'Basking shark',
'BSS' : 'European seabass',
'BSX' : 'Groupers, seabasses nei',
'BTH' : 'Bigeye thresher',
'BTP' : 'None',
'BUA' : 'Atlantic bumper',
'BUM' : 'Blue marlin',
'BUR' : 'Sompat grunt',
'BUT' : 'Atlantic butterfish',
'BUX' : 'Butterfishes, pomfrets nei',
'BVG' : 'Channel bull blenny',
'BVR' : 'Silver deepsea smelt',
'BVV' : 'Yellowmouth barracuda',
'BWH' : 'Moontail bullseye',
'BXD' : 'Alfonsino',
'BXL' : 'None',
'BYS' : 'Splendid alfonsino',
'BZD' : 'Blackbar hogfish',
'BZX' : 'Bonitos nei',
'CAA' : 'Atlantic wolffish',
'CAB' : 'Northern wolffish',
'CAL' : 'Callinectes swimcrabs nei',
'CAP' : 'Capelin',
'CAR' : 'Cartilaginous fishes nei',
'CAS' : 'Spotted wolffish',
'CAT' : 'Wolffishes(=Catfishes) nei',
'CAX' : 'Sea catfishes nei',
'CBA' : 'Cobia',
'CBC' : 'Red bandfish',
'CBG' : 'Driftfish',
'CBM' : 'Brown meagre',
'CBQ' : 'Chestnut goby',
'CBR' : 'Comber',
'CCA' : 'Bignose shark',
'CCE' : 'Bull shark',
'CCG' : 'Galapagos shark',
'CCI' : 'Blackspot shark',
'CCL' : 'Blacktip shark',
'CCP' : 'Sandbar shark',
'CCT' : 'Sand tiger shark',
'CDL' : 'Cardinal fishes nei',
'CDN' : 'Sloanes viperfish',
'CDX' : 'Croakers, drums nei',
'CDZ' : 'Northern cods nei',
'CEM' : 'Smallfin gulper shark',
'CEN' : 'Ruffs, barrelfishes nei',
'CEO' : 'Rudderfish',
'CEP' : 'Cephalopods nei',
'CET' : 'Wedge sole',
'CEX' : 'Cusk-eels nei',
'CFB' : 'Black dogfish',
'CFI' : 'Coral hind',
'CFW' : 'Pompano dolphinfish',
'CFZ' : 'Golden hind',
'CGE' : 'West African geryon',
'CGO' : 'Goldfish',
'CGX' : 'Carangids nei',
'CGZ' : 'Conger eels nei',
'CHM' : 'Cape elephantfish',
'CHO' : 'Cape Hope squid',
'CHR' : 'Chars nei',
'CIL' : 'Spotted flounder',
'CIT' : 'Citharids nei',
'CJM' : 'Chilean jack mackerel',
'CKL' : 'Law croaker',
'CKP' : 'None',
'CKW' : 'West African croakers nei',
'CLA' : 'Black and Caspian Sea sprat',
'CLB' : 'Atlantic surf clam',
'CLE' : 'Anchovy sprat',
'CLH' : 'Northern quahog(=Hard clam)',
'CLJ' : 'Japanese carpet shell',
'CLP' : 'Herrings, sardines nei',
'CLQ' : 'Ocean quahog',
'CLS' : 'Sand gaper',
'CLT' : 'Stimpsons surf clam',
'CLU' : 'Clupeoids nei',
'CLV' : 'Venus clams nei',
'CLX' : 'Clams, etc. nei',
'CMK' : 'Damselfish',
'CMM' : 'Manila clam',
'CMO' : 'Rabbit fish',
'CMR' : 'Mediterranean shore crab',
'CNC' : 'None',
'CNT' : 'Rough triggerfish',
'CNZ' : 'Crangon shrimps nei',
'COA' : 'American conger',
'COB' : 'Shi drum',
'COC' : 'Common edible cockle',
'COD' : 'Atlantic cod',
'COE' : 'European conger',
'COL' : 'Sardinia coral',
'COM' : 'Narrow-barred Spanish mackerel',
'COR' : 'Precious corals nei',
'COS' : 'Argentine conger',
'COU' : 'Rainbow wrasse',
'COW' : 'Navaga(=Wachna cod)',
'COX' : 'Conger eels, etc. nei',
'COZ' : 'Cockles nei',
'CPL' : 'Lowfin gulper shark',
'CPR' : 'Common prawn',
'CPU' : 'Little gulper shark',
'CPW' : 'Pearl fish',
'CQF' : 'Banded whiptail',
'CQL' : 'Hollowsnout grenadier',
'CRA' : 'Marine crabs nei',
'CRB' : 'Blue crab',
'CRE' : 'Edible crab',
'CRG' : 'Green crab',
'CRK' : 'Atlantic rock crab',
'CRN' : 'Crangonid shrimps nei',
'CRQ' : 'Queen crab',
'CRR' : 'Red crab',
'CRS' : 'Portunus swimcrabs nei',
'CRU' : 'Marine crustaceans nei',
'CRV' : 'Silver croaker',
'CRW' : 'Palinurid spiny lobsters nei',
'CRY' : 'Yellow croaker',
'CSH' : 'Common shrimp',
'CTB' : 'Common two-banded seabream',
'CTC' : 'Common cuttlefish',
'CTG' : 'Grooved carpet shell',
'CTL' : 'Cuttlefish, bobtail squids nei',
'CTR' : 'Dwarf bobtail squid',
'CTS' : 'None',
'CTZ' : 'Streaked gurnard',
'CUI' : 'Caspian shad',
'CUS' : 'Pink cusk-eel',
'CUT' : 'Hairtails, scabbardfishes nei',
'CUX' : 'Sea cucumbers nei',
'CVJ' : 'Crevalle jack',
'CVW' : 'Shortnose greeneye',
'CWG' : 'Prussian carp',
'CWO' : 'Gulper sharks nei',
'CWZ' : 'Carcharhinus sharks nei',
'CXF' : 'Redfish',
'CXS' : 'Bigeye trevally',
'CXY' : 'Volutes nei',
'CYA' : 'Smalleyed rabbitfish',
'CYH' : 'Large-eyed rabbitfish',
'CYO' : 'Portuguese dogfish',
'CYP' : 'Longnose velvet dogfish',
'CYY' : 'Shortnose velvet dogfish',
'CZW' : 'Carcinus crabs nei',
'DAB' : 'Common dab',
'DBA' : 'None',
'DBO' : 'Bottlenose dolphin',
'DCA' : 'Birdbeak dogfish',
'DCP' : 'Natantian decapods nei',
'DCT' : 'Roughear scad',
'DCX' : 'Diadromous clupeoids nei',
'DEA' : 'Angolan dentex',
'DEC' : 'Common dentex',
'DEJ' : 'None',
'DEL' : 'Large-eye dentex',
'DEM' : 'Morocco dentex',
'DEN' : 'Canary dentex',
'DEP' : 'Pink dentex',
'DEQ' : 'Four-spotted goby',
'DEX' : 'Dentex nei',
'DGH' : 'Dogfishes and hounds nei',
'DGP' : 'Painted sweetlips',
'DGS' : 'Picked dogfish',
'DGX' : 'Dogfish sharks nei',
'DGZ' : 'Dogfishes nei',
'DHQ' : 'Senegal seabream',
'DHV' : 'Honeycomb stingray',
'DHZ' : 'Ocellated wedge sole',
'DIA' : 'Diadromous fishes nei',
'DIN' : 'None',
'DJB' : 'Bearded horse mussel',
'DJC' : 'Corneous wedge clam',
'DOL' : 'Common dolphinfish',
'DON' : 'Donax clams',
'DOP' : 'Shortnose spurdog',
'DOR' : 'Dosinia clam',
'DOT' : 'Dogtooth tuna',
'DOX' : 'Dolphinfishes nei',
'DPF' : 'None',
'DPS' : 'Deep-water rose shrimp',
'DPX' : 'Demersal percomorphs nei',
'DRJ' : 'Flapper skate',
'DRS' : 'Boe drum',
'DRU' : 'Sciaenas nei',
'DSX' : 'Mature dosinia',
'DUS' : 'Dusky shark',
'DXL' : 'Truncate donax',
'DYL' : 'Flying gurnard',
'DYZ' : 'Redbarred lizardfish',
'EAG' : 'Eagle rays nei',
'EBS' : 'Brilliant pomfret',
'ECE' : 'Mediterranean flyingfish',
'ECH' : 'Echinoderms',
'EDE' : 'Ferrers goby',
'EDR' : 'Pelagic armourhead',
'EDT' : 'Musky octopus',
'EEA' : 'Blacktip grouper',
'EEI' : 'Haifa grouper',
'EEJ' : 'Speckled grouper',
'EEM' : 'Snubnose grouper',
'EEO' : 'Queen snapper',
'EEP' : 'Comet grouper',
'EER' : 'Honeycomb grouper',
'EET' : 'Atlantic goliath grouper',
'EEV' : 'Blue-and-yellow grouper',
'EEX' : 'Starspotted grouper',
'EFG' : 'Prickly puffer',
'EFH' : 'Brownspotted grouper',
'EFJ' : 'Dogtooth grouper',
'EFT' : 'Tomato hind',
'EGC' : 'None',
'EGD' : 'Pencil cardinal',
'EHI' : 'Curled picarel',
'EIK' : 'Warty crab',
'EJB' : 'African cuttlefish',
'EJE' : 'Elegant cuttlefish',
'ELE' : 'European eel',
'ELP' : 'Eelpout',
'ELX' : 'River eels nei',
'ELZ' : 'Eelpouts',
'EML' : 'Blacksaddled coralgrouper',
'EMP' : 'Emperors(=Scavengers) nei',
'EMT' : 'Bonnetmouths, rubyfishes nei',
'ENC' : 'Fourbeard rockling',
'ENX' : 'Rock cook',
'EOI' : 'Horned octopus',
'EPI' : 'Black cardinal fish',
'EPK' : 'Goldblotch grouper',
'EQE' : 'Pod razor shell',
'EQI' : 'Sword razor shell',
'EQK' : 'Arched razor shell',
'EQR' : 'Rugose mactra',
'ERO' : 'Pitted stonefish',
'ERS' : 'Chinese mitten crab',
'ETA' : 'Deep-water red snapper',
'ETC' : 'Deepwater longtail red snapper',
'ETP' : 'Smooth lanternshark',
'ETR' : 'Great lanternshark',
'ETX' : 'Velvet belly',
'EWR' : 'Red-tipped grouper',
'EWU' : 'White-blotched grouper',
'EYO' : 'Atlantic rubyfish',
'EZR' : 'Oblique-banded grouper',
'EZS' : 'Slender rockfish',
'F00' : 'Total fishery products',
'F01' : 'Total aquatic animals',
'F02' : 'Total freshwater and diadromous fish',
'F04' : 'Total shellfish',
'F07' : 'Total finfish',
'F08' : 'Total finfish and shellfish',
'F10' : 'Freshwater fishes',
'F11' : 'Carps, barbels and other cyprinids',
'F12' : 'Tilapias and other cichlids',
'F13' : 'Miscellaneous freshwater fishes',
'F20' : 'Diadromous fishes',
'F21' : 'Sturgeons, paddlefishes',
'F22' : 'River eels',
'F23' : 'Salmons, trouts, smelts',
'F24' : 'Shads',
'F25' : 'Miscellaneous diadromous fishes',
'F30' : 'Marine fishes',
'F31' : 'Flounders, halibuts, soles',
'F32' : 'Cods, hakes, haddocks',
'F33' : 'Miscellaneous coastal fishes',
'F34' : 'Miscellaneous demersal fishes',
'F35' : 'Herrings, sardines, anchovies',
'F36' : 'Tunas, bonitos, billfishes',
'F37' : 'Miscellaneous pelagic fishes',
'F38' : 'Sharks, rays, chimaeras',
'F39' : 'Marine fishes not identified',
'F40' : 'Crustaceans',
'F41' : 'Freshwater crustaceans',
'F42' : 'Crabs, sea-spiders',
'F43' : 'Lobsters, spiny-rock lobsters',
'F44' : 'King crabs, squat-lobsters',
'F45' : 'Shrimps, prawns',
'F46' : 'Krill, planktonic crustaceans',
'F47' : 'Miscellaneous marine crustaceans',
'F50' : 'Molluscs',
'F51' : 'Freshwater molluscs',
'F52' : 'Abalones, winkles, conchs',
'F53' : 'Oysters',
'F54' : 'Mussels',
'F55' : 'Scallops, pectens',
'F56' : 'Clams, cockles, arkshells',
'F57' : 'Squids, cuttlefishes, octopuses',
'F58' : 'Miscellaneous marine molluscs',
'F60' : 'Whales, seals and other aquatic mammals',
'F62' : 'Sperm-whales, pilot-whales',
'F63' : 'Eared seals, hair seals, walruses',
'F64' : 'Miscellaneous aquatic mammals',
'F70' : 'Miscellaneous aquatic animals',
'F72' : 'Turtles',
'F74' : 'Sea-squirts and other tunicates',
'F76' : 'Sea-urchins and other echinoderms',
'F77' : 'Miscellaneous aquatic invertebrates',
'F80' : 'Miscellaneous aquatic animal products',
'F81' : 'Pearls, mother-of-pearl, shells',
'F82' : 'Corals',
'F83' : 'Sponges',
'F90' : 'Aquatic plants',
'F91' : 'Brown seaweeds',
'F92' : 'Red seaweeds',
'F93' : 'Green seaweeds',
'F94' : 'Miscellaneous aquatic plants',
'FAC' : 'Crimson pasiphaeid',
'FAL' : 'Silky shark',
'FAM' : 'Pink glass shrimp',
'FAS' : 'Falkland sprat',
'FAV' : 'White glass shrimp',
'FBM' : 'Freshwater bream',
'FBR' : 'Freshwater breams nei',
'FBU' : 'Burbot',
'FCC' : 'Crucian carp',
'FCG' : 'Grass carp(=White amur)',
'FCP' : 'Common carp',
'FCY' : 'Cyprinids nei',
'FEL' : 'Red snapping shrimp',
'FFX' : 'Filefishes, leatherjackets nei',
'FID' : 'Orfe(=Ide)',
'FIE' : 'Common dace',
'FIM' : 'Transparent goby',
'FIN' : 'Finfishes nei',
'FIO' : 'Bluespotted cornetfish',
'FKL' : 'Wooden fasciolaria',
'FKU' : 'Red forkweed',
'FLE' : 'European flounder',
'FLS' : 'Summer flounder',
'FLW' : 'Winter flounder',
'FLX' : 'Flatfishes nei',
'FLY' : 'Flyingfishes nei',
'FMG' : 'Scotch bonnet',
'FMS' : 'Saburon helmet',
'FNT' : 'Banded murex',
'FOD' : 'Common piddock',
'FOR' : 'Forkbeard',
'FOX' : 'Forkbeards nei',
'FPE' : 'European perch',
'FPI' : 'Northern pike',
'FPP' : 'Pike-perch',
'FRC' : 'None',
'FRF' : 'Freshwater fishes nei',
'FRI' : 'Frigate tuna',
'FRL' : 'Bean solen',
'FRO' : 'Roach',
'FRS' : 'Fringescale sardinella',
'FRX' : 'Roaches nei',
'FRZ' : 'Frigate and bullet tunas',
'FSC' : 'Sichel',
'FSU' : 'Cloudy keyhole limpet',
'FTE' : 'Tench',
'FTX' : 'None',
'FUA' : 'Wracks nei',
'FUU' : 'Toothed wrack',
'FUV' : 'Bladder wrack',
'FVE' : 'Vendace',
'FXF' : 'Flexuous scallop',
'FXG' : 'Smooth scallop',
'FXX' : 'Eunice sea-worms',
'FYS' : 'Nori nei',
'GAD' : 'Gadiformes nei',
'GAG' : 'Tope shark',
'GAL' : 'Lesser African threadfin',
'GAM' : 'Mouse catshark',
'GAR' : 'Garfish',
'GAS' : 'Gastropods nei',
'GAT' : 'White barbel',
'GAU' : 'Crest-tail catsharks nei',
'GBA' : 'Great barracuda',
'GBC' : 'Giant goby',
'GBF' : 'Two-spotted goby',
'GBL' : 'Biglip grunt',
'GBN' : 'Black goby',
'GBO' : 'Grass goby',
'GBR' : 'Rubberlip grunt',
'GCC' : 'Violet bittersweet',
'GCR' : 'Red-mouthed goby',
'GDB' : 'Two-spotted clingfish',
'GDE' : 'Threadfin rockling',
'GDG' : 'Silvery pout',
'GDT' : 'Arctic rockling',
'GEL' : 'Gelidium seaweeds',
'GEP' : 'Snake mackerels, escolars nei',
'GEQ' : 'Giant gelidium',
'GER' : 'Chaceon geryons nei',
'GEU' : 'Torroto grunt',
'GEY' : 'Deep-sea crabs, geryons nei',
'GFB' : 'Greater forkbeard',
'GGD' : 'Shore rockling',
'GGU' : 'Three-bearded rockling',
'GGY' : 'Mediterranean bigeye rockling',
'GHB' : 'Bucchichs goby',
'GHL' : 'Greenland halibut',
'GIT' : 'Giant tiger prawn',
'GJI' : 'Richards gibbula',
'GKA' : 'Leather gracilaria',
'GKL' : 'Common European bittersweet',
'GLI' : 'Smalltoothed argentine',
'GMG' : 'Thorny tinselfish',
'GMR' : 'Grey large-eye bream',
'GMW' : 'Blue-lined large-eye bream',
'GOA' : 'West African goatfish',
'GOB' : 'Atlantic gobies nei',
'GOG' : 'Gudgeon',
'GON' : 'Rock goby',
'GOO' : 'Goose barnacles nei',
'GOX' : 'Goatfishes',
'GOZ' : 'Steinitzs goby',
'GPA' : 'Gobies nei',
'GPB' : 'Brazilian groupers nei',
'GPC' : 'Guinean pike conger',
'GPD' : 'Dusky grouper',
'GPE' : 'Longfin hake',
'GPN' : 'Nassau grouper',
'GPW' : 'White grouper',
'GPX' : 'Groupers nei',
'GQR' : 'Red harpoon',
'GRA' : 'African striped grunt',
'GRB' : 'Bigeye grunt',
'GRC' : 'Greenland cod',
'GRM' : 'Patagonian grenadier',
'GRN' : 'Blue grenadier',
'GRO' : 'Groundfishes nei',
'GRQ' : 'Mediterranean geryon',
'GRV' : 'Grenadiers nei',
'GRX' : 'Grunts, sweetlips nei',
'GSK' : 'Greenland shark',
'GSM' : 'Solenette',
'GSQ' : 'Lightfoot crab',
'GTA' : 'Three-spined stickleback',
'GTF' : 'Guitarfishes, etc. nei',
'GTR' : 'None',
'GUC' : 'Cape gurnard',
'GUG' : 'Grey gurnard',
'GUI' : 'Indo-Pacific gurnards',
'GUM' : 'Longfin gurnard',
'GUN' : 'Piper gurnard',
'GUP' : 'Gulper shark',
'GUQ' : 'Leafscale gulper shark',
'GUR' : 'Red gurnard',
'GUS' : 'Guinea shrimp',
'GUU' : 'Tub gurnard',
'GUX' : 'Gurnards, searobins nei',
'GUY' : 'Gurnards nei',
'GUZ' : 'Guitarfishes nei',
'GWV' : 'Striped goby',
'GXW' : 'Darwins slimehead',
'GYX' : 'Geryons nei',
'HAD' : 'Haddock',
'HAL' : 'Atlantic halibut',
'HAU' : 'Hapuka',
'HCX' : 'Hard clams nei',
'HCZ' : 'Squirrelfishes nei',
'HDR' : 'Black wing flyingfish',
'HDV' : 'Imperial blackfish',
'HER' : 'Atlantic herring',
'HFG' : 'Black bream',
'HKB' : 'Benguela hake',
'HKC' : 'Cape hakes',
'HKE' : 'European hake',
'HKF' : 'Armed nylon shrimp',
'HKK' : 'Shallow-water Cape hake',
'HKM' : 'Senegalese hake',
'HKN' : 'Southern hake',
'HKO' : 'Deep-water Cape hake',
'HKP' : 'Argentine hake',
'HKR' : 'Red hake',
'HKS' : 'Silver hake',
'HKT' : 'Smooth nylon shrimp',
'HKU' : 'Brazilian codling',
'HKW' : 'White hake',
'HKX' : 'Hakes nei',
'HKZ' : 'Merluccid hakes nei',
'HLT' : 'Tuberculate abalone',
'HLZ' : 'Sea thong',
'HMC' : 'Cape horse mackerel',
'HMJ' : 'Pinecone soldierfish',
'HMM' : 'Mediterranean horse mackerel',
'HMY' : 'False scad',
'HMZ' : 'Cunene horse mackerel',
'HNG' : 'Black sole',
'HNQ' : 'Pink geryon',
'HNY' : 'Velvet helcion',
'HOL' : 'Chimaeras, etc. nei',
'HOM' : 'Atlantic horse mackerel',
'HOT' : 'Blue hottentot',
'HOU' : 'Houting',
'HPH' : 'Short snouted seahorse',
'HPR' : 'Mediterranean slimehead',
'HQM' : 'Two-row rock shell',
'HTS' : 'Banded butterflyfish',
'HTU' : 'Glasseye',
'HUH' : 'Beluga',
'HUS' : 'Yellow snapper',
'HVP' : 'Patagonian hake',
'HVQ' : 'None',
'HVS' : 'Sabre squirrelfish',
'HWB' : 'Pelagic basslet',
'HWH' : 'Redcoat',
'HWK' : 'Kai soldierfish',
'HXB' : 'Bicolor jack',
'HXT' : 'Sharpnose sevengill shark',
'HYB' : 'Benoits lanternfish',
'HYD' : 'Ratfishes nei',
'HYS' : 'Glasshead grenadier',
'HZL' : 'Azores chromis',
'IAR' : 'Pink cuttlefish',
'IAX' : 'Cuttlefishes nei',
'IBD' : 'Tånings lanternfish',
'IBK' : 'Scalebreast gurnard',
'IFJ' : 'None',
'IGU' : 'Dusky spinefoot',
'III' : 'Thickened nassa',
'IIR' : 'Netted nassa',
'ILG' : 'Peacock wrasse',
'ILI' : 'West African ilisha',
'ILL' : 'Shortfin squids nei',
'IMS' : 'Carragheen (Irish) moss',
'INV' : 'Aquatic invertebrates nei',
'IOD' : 'Blue-leg swimcrab',
'IOX' : 'Sepiola bobtail squids nei',
'ISC' : 'Iceland scallop',
'ISV' : 'Rock chickory',
'ITG' : 'Elegant bobtail squid',
'ITW' : 'Common bobtail squid',
'IVD' : 'Barred hogfish',
'JAA' : 'Blue jack mackerel',
'JAD' : 'Norwegian skate',
'JAI' : 'Brown ray',
'JAM' : 'Maltese ray',
'JAR' : 'Rough ray',
'JAT' : 'Rough skate',
'JAX' : 'Jack and horse mackerels nei',
'JAY' : 'Speckled ray',
'JBA' : 'Anomalous lanternfish',
'JCM' : 'None',
'JCN' : 'Emerald wrasse',
'JCR' : 'Royal cucumber',
'JCX' : 'Maja spider crabs nei',
'JDO' : 'Tortoneses stingray',
'JDP' : 'Common stingray',
'JEL' : 'Jellyfishes nei',
'JFV' : 'Leopard skate',
'JHD' : 'Gemellars lanternfish',
'JNA' : 'Pacific coral moss',
'JOD' : 'John dory',
'JOS' : 'Silvery John dory',
'JRF' : 'African ray',
'JRS' : 'Mediterranean starry ray',
'JRW' : 'Bigelows ray',
'JUX' : 'Exasperating jujubine',
'JXX' : 'Argentines nei',
'KAW' : 'Kawakawa',
'KCA' : 'King crab',
'KCB' : 'Toothed rock crab',
'KCD' : 'Red king crab',
'KCP' : 'Kingklip',
'KCS' : 'King crabs',
'KCT' : 'Stone king crab',
'KCX' : 'King crabs, stone crabs nei',
'KCZ' : 'King crabs nei',
'KDE' : 'Spiny bonnet',
'KDF' : 'American slipper-limpet',
'KDH' : 'Rugose bonnet',
'KEF' : 'Deep-sea red crab',
'KFA' : 'Chamber venus',
'KGM' : 'King mackerel',
'KGX' : 'Seerfishes nei',
'KIA' : 'None',
'KJC' : 'Girdle anemone',
'KLK' : 'Smooth callista',
'KNS' : 'Knife shrimp',
'KNW' : 'Red panache',
'KOB' : 'Southern meagre(=Mulloway)',
'KON' : 'Granular top-shell',
'KOY' : 'Reticulated leatherjacket',
'KPG' : 'Shamefaced crab',
'KRI' : 'Antarctic krill',
'KRJ' : 'Knobbed triton',
'KSY' : 'Kleins sole',
'KTG' : 'Olive green cockle',
'KTS' : 'Sand cockle',
'KTT' : 'Tuberculate cockle',
'KTU' : 'Poorly ribbed cockle',
'KTV' : 'Long-armed squid',
'KUP' : 'Kuruma prawn',
'KYP' : 'Kyphosus sea chubs nei',
'KYS' : 'Bermuda sea chub',
'KYX' : 'Sea chubs nei',
'LAG' : 'Opah',
'LAH' : 'North European kelp',
'LAR' : 'River lamprey',
'LAS' : 'Lampreys nei',
'LAU' : 'Sea lamprey',
'LAZ' : 'Kelps nei',
'LBA' : 'American lobster',
'LBE' : 'European lobster',
'LBS' : 'Homarus lobsters nei',
'LCM' : 'Fivebeard rockling',
'LCT' : 'Arctic eelpout',
'LDA' : 'None',
'LDB' : 'Four-spot megrim',
'LDS' : 'Mirror lanternfish',
'LDV' : 'Large-scaled gurnard',
'LEC' : 'Escolar',
'LEE' : 'Leerfish',
'LEF' : 'Lefteye flounders nei',
'LEM' : 'Lemon sole',
'LEP' : 'Spiny gurnard',
'LEZ' : 'Megrims nei',
'LFA' : 'None',
'LFL' : 'Smooth puffer',
'LFO' : 'Brain root coral',
'LFX' : 'None',
'LFZ' : 'Silver-cheeked toadfish',
'LGH' : 'Oceanic puffer',
'LHB' : 'Spotcheek emperor',
'LHN' : 'Spangled emperor',
'LHT' : 'Largehead hairtail',
'LIL' : 'Striped seasnail',
'LIM' : 'Montagus seasnail',
'LIN' : 'Ling',
'LIO' : 'Velvet swimcrab',
'LIS' : 'Kelp snailfish',
'LIT' : 'Lithothamnion',
'LIX' : 'Lizardfishes nei',
'LJA' : 'African red snapper',
'LJE' : 'African brown snapper',
'LJV' : 'Blacktail snapper',
'LKD' : 'Lepadidae barnacles nei',
'LKJ' : 'Italian deep-sea shrimp',
'LKN' : 'Catalonian striped shrimp',
'LKO' : 'Arrow shrimp',
'LKS' : 'Striped gladiator shrimp',
'LKT' : 'Golden shrimp',
'LKW' : 'Striped soldier shrimp',
'LMA' : 'Longfin mako',
'LMG' : 'Longfin codling',
'LMZ' : 'Mackerel sharks',
'LNL' : 'Longtail croaker',
'LNZ' : 'Lings nei',
'LOB' : 'Tripletail',
'LOO' : 'Smalltooth sand tiger',
'LOQ' : 'Craylets, squat lobsters nei',
'LOS' : 'Slipper lobsters nei',
'LOT' : 'Longtail tuna',
'LOX' : 'Lobsters nei',
'LOY' : 'Royal spiny lobster',
'LPS' : 'North Atlantic codling',
'LPZ' : 'Limpets nei',
'LQA' : 'Arched swimming crab',
'LQB' : 'Northern lucina',
'LQD' : 'Tangle',
'LQV' : 'Smooth swimcrab',
'LQX' : 'Sea belt',
'LQY' : 'Rough limpet',
'LRI' : 'Goldbanded jobfish',
'LRL' : 'Striped trumpeter',
'LRX' : 'Yellowtail blue snapper',
'LRY' : 'Ornate jobfish',
'LSJ' : 'Shrimp scad',
'LTA' : 'Little tunny(=Atl.black skipj)',
'LTN' : 'Atlantic emperor',
'LTQ' : 'Sky emperor',
'LUH' : 'Chub',
'LUK' : 'African moonfish',
'LUM' : 'Lumpfish(=Lumpsucker)',
'LVC' : 'Norwegian egg cockle',
'LVD' : 'Eelpouts nei',
'LVK' : 'Common bluestripe snapper',
'LVM' : 'Luvar',
'LVN' : 'Golden African snapper',
'LWD' : 'None',
'LWZ' : 'Oblique-banded snapper',
'LXE' : 'Orange-spotted emperor',
'LXT' : 'Long clawed porcelain crab',
'LYY' : 'Dragonet',
'LZS' : 'Leaping mullet',
'LZZ' : 'None',
'MAA' : 'Blue mackerel',
'MAC' : 'Atlantic mackerel',
'MAK' : 'Mako sharks',
'MAM' : 'Aquatic mammals nei',
'MAS' : 'Pacific chub mackerel',
'MAT' : 'Mactra surf clams nei',
'MAV' : 'Silvery lightfish',
'MAW' : 'West African Spanish mackerel',
'MAX' : 'Mackerels nei',
'MAZ' : 'Scomber mackerels nei',
'MBF' : 'Knout goby',
'MBL' : 'Butterfly goby',
'MCD' : 'Madeira lantern fish',
'MCH' : 'Bigeye grenadier',
'MDO' : 'Monrovia doctorfish',
'MDZ' : 'None',
'MEB' : 'None',
'MEG' : 'Megrim',
'MEL' : 'None',
'MET' : 'Metapenaeus shrimps nei',
'MFF' : 'Flagfin mojarra',
'MFZ' : 'None',
'MGA' : 'Golden grey mullet',
'MGC' : 'Thinlip grey mullet',
'MGI' : 'Parassi mullet',
'MGR' : 'Meagre',
'MGS' : 'None',
'MHA' : 'Atlantic menhaden',
'MHH' : 'Whiskered sole',
'MHP' : 'Argentine menhaden',
'MIA' : 'None',
'MJW' : 'Toad, lyre crabs nei',
'MKF' : 'Island grouper',
'MKG' : 'Thickback sole',
'MKM' : 'Gag',
'MKU' : 'Mottled grouper',
'MLL' : 'Softhead grenadier',
'MLO' : 'Western softhead grenadier',
'MLR' : 'Thicklip grey mullet',
'MLS' : 'Striped marlin',
'MMH' : 'Mediterranean moray',
'MMM' : 'Antarctic armless flounder',
'MMS' : 'None',
'MNE' : 'None',
'MNZ' : 'Monkfishes nei',
'MOA' : 'Atlantic moonfish',
'MOD' : 'Horse mussels nei',
'MOL' : 'Marine molluscs nei',
'MON' : 'Angler(=Monk)',
'MOO' : 'Moonfish',
'MOP' : 'Sunfish',
'MOR' : 'Moras nei',
'MOX' : 'Ocean sunfish',
'MPN' : 'Speckled shrimp',
'MPO' : 'Bull ray',
'MPT' : 'Blackspotted smooth-hound',
'MQL' : 'Knobby swimcrab',
'MRK' : 'Foureyed sole',
'MRM' : 'Lusitanian cownose ray',
'MSD' : 'Mackerel scad',
'MSF' : 'Mediterranean scaldfish',
'MSH' : 'Marine shells nei',
'MSK' : 'Mackerel sharks,porbeagles nei',
'MSM' : 'Mediterranean mussel',
'MSP' : 'Mediterranean spearfish',
'MSX' : 'Sea mussels nei',
'MTC' : 'Atlantic soft pout',
'MTP' : 'Spotted lanternfish',
'MTS' : 'Spottail mantis squillid',
'MUE' : 'Murex',
'MUF' : 'Flathead grey mullet',
'MUI' : 'Morays nei',
'MUL' : 'Mullets nei',
'MUM' : 'Goatfishes, red mullets nei',
'MUR' : 'Surmullet',
'MUS' : 'Blue mussel',
'MUT' : 'Red mullet',
'MUV' : 'Yellowstripe goatfish',
'MUX' : 'Surmullets(=Red mullets) nei',
'MVA' : 'Shortspine African angler',
'MVD' : 'Atlantic lyre crab',
'MVO' : 'Devil anglerfish',
'MWK' : 'None',
'MWM' : 'None',
'MWQ' : 'Trough shells nei',
'MXV' : 'Shorthorn sculpin',
'MYG' : 'Hagfish',
'MYL' : 'Common eagle ray',
'MYV' : 'Mytilus mussels nei',
'MYZ' : 'So-iuy mullet',
'MZZ' : 'Marine fishes nei',
'NAS' : 'Bluespine unicornfish',
'NAU' : 'Pilotfish',
'NAZ' : 'Gibbous nassa',
'NBU' : 'Round goby',
'NEC' : 'Red codling',
'NED' : 'Needlefishes nei',
'NEK' : 'Carol bobtail squid',
'NEM' : 'Mozambique lobster',
'NEN' : 'Black gemfish',
'NEP' : 'Norway lobster',
'NEX' : 'True lobsters,lobsterettes nei',
'NGU' : 'Yellowspotted trevally',
'NGY' : 'Bludger',
'NIS' : 'Megalops shrimp',
'NKR' : 'Norwegian krill',
'NLT' : 'Black snake mackerel',
'NMM' : 'None',
'NOP' : 'Norway pout',
'NOS' : 'Striped-eyed rockcod',
'NOT' : 'Patagonian rockcod',
'NOW' : 'Snakelocks anemone',
'NOX' : 'Antarctic rockcods, noties nei',
'NRD' : 'Spotted barracudina',
'NSQ' : 'Changeable nassa',
'NUO' : 'Butterfly blenny',
'NUQ' : 'Anomuran decapods nei',
'NXI' : 'Giant trevally',
'NXM' : 'Bluefin trevally',
'NXU' : 'Black jack',
'NZA' : 'Common Atlantic grenadier',
'OAL' : 'Senegalese sole',
'OAM' : 'Adriatic sole',
'OAW' : 'None',
'OBN' : 'Peppery furrow',
'OBP' : 'None',
'OBR' : 'None',
'OBZ' : 'Sand goby',
'OCC' : 'Common octopus',
'OCM' : 'Horned and musky octopuses',
'OCN' : 'White-spotted octopus',
'OCS' : 'Oceanic whitetip shark',
'OCT' : 'Octopuses, etc. nei',
'OCZ' : 'Octopuses nei',
'ODL' : 'Boxlip mullet',
'OFC' : 'Webbed flying squid',
'OFE' : 'Orangeback flying squid',
'OFJ' : 'Neon flying squid',
'OFN' : 'Ribboned nori',
'OGT' : 'Cardinal fish',
'OHQ' : 'Common pelican-foot',
'OIL' : 'Oilfish',
'OKS' : 'Ruppells octopus squid',
'OLC' : 'None',
'OLV' : 'Paromola',
'OMM' : 'Flying squids nei',
'OMZ' : 'Ommastrephidae squids nei',
'ONK' : 'Lamarcks razor shell',
'OOA' : 'Snake blenny',
'OOS' : 'Serpent eel',
'OPH' : 'Cusk-eels, brotulas nei',
'OPT' : 'Ocean pout',
'OQD' : 'Lilliput longarm octopus',
'OQM' : 'Red sea mantis shrimp',
'OQT' : 'Spider octopus',
'ORD' : 'Oreo dories nei',
'ORY' : 'Orange roughy',
'OSG' : 'Pudgy cuskeel',
'OST' : 'Flat and cupped oysters nei',
'OUB' : 'Wide-eyed flounder',
'OUL' : 'European common squid',
'OUM' : 'Midsize squid',
'OUW' : 'Alloteuthis squids nei',
'OXN' : 'Sailfin roughshark',
'OXY' : 'Angular roughshark',
'OYC' : 'Cupped oysters nei',
'OYF' : 'European flat oyster',
'OYG' : 'Pacific cupped oyster',
'OYX' : 'Flat oysters nei',
'PAA' : 'Baltic prawn',
'PAC' : 'Common pandora',
'PAG' : 'Softshell red crab',
'PAL' : 'Palaemonid shrimps nei',
'PAN' : 'Pandalus shrimps nei',
'PAR' : 'Red pandora',
'PAT' : 'Longtail Southern cod',
'PAU' : 'Undulate venus',
'PAX' : 'Pandoras nei',
'PBC' : 'Palebelly searsid',
'PCB' : 'Barnacle',
'PCR' : 'Tanner crabs nei',
'PDZ' : 'Pandalid shrimps nei',
'PEE' : 'Common periwinkle',
'PEI' : 'None',
'PEL' : 'Pelagic fishes nei',
'PEN' : 'Penaeus shrimps nei',
'PEQ' : 'Yellowfin river pellona',
'PER' : 'Periwinkles nei',
'PET' : 'Royal threadfin',
'PEW' : 'White perch',
'PEZ' : 'Penaeid shrimps nei',
'PFM' : 'Crimson jobfish',
'PGA' : 'Panga seabream',
'PHO' : 'Rissos smooth-head',
'PIA' : 'None',
'PIC' : 'Picarels nei',
'PIL' : 'European pilchard(=Sardine)',
'PIQ' : 'Delta prawn',
'PIZ' : 'Striate pisania',
'PJC' : 'African armoured searobin',
'PKE' : 'Parrot grunt',
'PKV' : 'Olive grunt',
'PLA' : 'Amer. plaice(=Long rough dab)',
'PLE' : 'European plaice',
'PLN' : 'European whitefish',
'PLS' : 'Pelagic stingray',
'PLY' : 'Polystegan seabreams nei',
'PLZ' : 'Righteye flounders nei',
'PNB' : 'Redspotted shrimp',
'PNI' : 'Indian white prawn',
'PNQ' : 'Rockpool prawn',
'PNU' : 'Southern brown shrimp',
'PNV' : 'Whiteleg shrimp',
'POA' : 'Atlantic pomfret',
'POC' : 'Polar cod',
'POD' : 'Poor cod',
'POI' : 'Offshore rockfish',
'POK' : 'Saithe(=Pollock)',
'POL' : 'Pollack',
'POP' : 'Pompano',
'POR' : 'Porbeagle',
'POS' : 'Southern blue whiting',
'POX' : 'Pompanos nei',
'PPH' : 'Bahamas sawshark',
'PPX' : 'Pelagic percomorphs nei',
'PRA' : 'Northern prawn',
'PRC' : 'Percoids nei',
'PRI' : 'Bigeyes,glasseyes,bulleyes nei',
'PRP' : 'Roudi escolar',
'PRR' : 'Parrotfish',
'PSB' : 'Spiny turbot',
'PSE' : 'Bobo croaker',
'PSK' : 'Crocodile shark',
'PSL' : 'Pink spiny lobster',
'PSS' : 'Cassava croaker',
'PTM' : 'False catshark',
'PTY' : 'Longneck croaker',
'PUF' : 'Northern puffer',
'PUX' : 'Puffers nei',
'PVJ' : 'Narwal shrimp',
'PVQ' : 'Senegalese smooth swimcrab',
'PVR' : 'Atlantic ditch shrimp',
'PWT' : 'Parrotfishes nei',
'PXV' : 'Stout beardfish',
'PZB' : 'Barracudinas, etc. nei',
'QFF' : 'None',
'QPH' : 'Henslows swimming crab',
'QPX' : 'Palaemon shrimps nei',
'QSC' : 'Queen scallop',
'QSO' : 'None',
'QTV' : 'Limpet',
'QUB' : 'Longnose spurdog',
'QUL' : 'Spined pygmy shark',
'QZU' : 'Bigeye picarel',
'RAE' : 'European razor clam',
'RAG' : 'Indian mackerel',
'RAJ' : 'Rays and skates nei',
'RAQ' : 'Spanner crab',
'RAT' : 'Spotted ratfish',
'RAZ' : 'Solen razor clams nei',
'RBC' : 'Blackchin guitarfish',
'RBF' : 'None',
'RBO' : 'Slender guitarfish',
'RBX' : 'Common guitarfish',
'RCR' : 'Tadpole fish',
'RCT' : 'Straightnose rabbitfish',
'RCW' : 'Red swamp crawfish',
'RDC' : 'Roughtail stingray',
'RDQ' : 'None',
'REA' : 'Redbanded seabream',
'REB' : 'Beaked redfish',
'REC' : 'Cape redfish',
'RED' : 'Atlantic redfishes nei',
'REG' : 'Golden redfish',
'REL' : 'King of herrings',
'REO' : 'Shark sucker',
'REQ' : 'Patagonian redfish',
'RES' : 'Mangrove red snapper',
'RFT' : 'Starry skate',
'RGH' : 'Squaretail kob',
'RGL' : 'Spiny butterfly ray',
'RGX' : 'Scaldfishes nei',
'RHA' : 'Milk shark',
'RHC' : 'Knife-nosed chimaeras nei',
'RHG' : 'Roughhead grenadier',
'RHP' : 'Dulse',
'RHT' : 'Atlantic sharpnose shark',
'RIB' : 'Common mora',
'RJA' : 'White skate',
'RJB' : 'Blue skate',
'RJC' : 'Thornback ray',
'RJE' : 'Small-eyed ray',
'RJF' : 'Shagreen ray',
'RJG' : 'Arctic skate',
'RJH' : 'Blonde ray',
'RJI' : 'Sandy ray',
'RJK' : 'Sailray',
'RJM' : 'Spotted ray',
'RJN' : 'Cuckoo ray',
'RJO' : 'Longnosed skate',
'RJP' : 'Soft skate',
'RJQ' : 'Spinetail ray',
'RJR' : 'Starry ray',
'RJT' : 'Winter skate',
'RJU' : 'Undulate ray',
'RJV' : 'None',
'RJY' : 'Round ray',
'RKQ' : 'Noahs ark',
'RKZ' : 'Scaldback',
'RLF' : 'Lesser amberjack',
'RLI' : 'Imperial scaldfish',
'RLR' : 'Guinean amberjack',
'RMM' : 'Devil fish',
'RNG' : 'Roundnose grenadier',
'RNH' : 'Thors scaldfish',
'RNJ' : 'Blackbanded trevally',
'ROA' : 'Stout bobtail squid',
'ROB' : 'Snooks(=Robalos) nei',
'ROL' : 'Rocklings nei',
'RPC' : 'Dwarf sawfish',
'RPF' : 'Red Sea goatfish',
'RPG' : 'Red porgy',
'RPN' : 'Sea snails',
'RPR' : 'Common sawfish',
'RPW' : 'Veined rapa whelk',
'RRH' : 'Red-eye round herring',
'RRU' : 'Rainbow runner',
'RSC' : 'Rough scad',
'RSE' : 'Red scorpionfish',
'RSK' : 'Requiem sharks nei',
'RSP' : 'Roundscale spearfish',
'RTB' : 'Round stingray',
'RTX' : 'Grenadiers, rattails nei',
'RUB' : 'Blue runner',
'RXY' : 'Meagres nei',
'RYG' : 'Rubyfish',
'SAA' : 'Round sardinella',
'SAE' : 'Madeiran sardinella',
'SAI' : 'Atlantic sailfish',
'SAL' : 'Atlantic salmon',
'SAN' : 'Sandeels(=Sandlances) nei',
'SAO' : 'Tadpole codling',
'SAR' : 'Mango tilapia',
'SAU' : 'Atlantic saury',
'SAX' : 'Sauries nei',
'SBA' : 'Axillary seabream',
'SBB' : 'Boa dragonfish',
'SBF' : 'Southern bluefin tuna',
'SBG' : 'Gilthead seabream',
'SBL' : 'Bluntnose sixgill shark',
'SBN' : 'None',
'SBP' : 'Pargo breams nei',
'SBR' : 'Blackspot seabream',
'SBS' : 'Saddled seabream',
'SBX' : 'Porgies, seabreams nei',
'SBZ' : 'Zebra seabream',
'SCA' : 'American sea scallop',
'SCB' : 'Atlantic bay scallop',
'SCD' : 'Blue swimming crab',
'SCE' : 'Great Atlantic scallop',
'SCF' : 'Turbots nei',
'SCK' : 'Kitefin shark',
'SCL' : 'Catsharks, nursehounds nei',
'SCO' : 'Scorpionfishes, redfishes nei',
'SCP' : 'Scup',
'SCR' : 'Spinous spider crab',
'SCS' : 'Scorpionfishes, rockfishes nei',
'SCU' : 'Sculpins',
'SCX' : 'Scallops nei',
'SCY' : 'Lesser slipper lobster',
'SDH' : 'Rough longnose dogfish',
'SDR' : 'Atlantic lizardfish',
'SDS' : 'Starry smooth-hound',
'SDU' : 'Arrowhead dogfish',
'SDV' : 'Smooth-hounds nei',
'SDX' : 'Scads nei',
'SEG' : 'Grey seal',
'SEO' : 'Choicy ruff',
'SEY' : 'Violet warehou',
'SFA' : 'Indo-Pacific sailfish',
'SFR' : 'Nilssons pipefish',
'SFS' : 'Silver scabbardfish',
'SFV' : 'Norway redfish',
'SGZ' : 'California scorpionfish',
'SHA' : 'American shad',
'SHB' : 'Bramble shark',
'SHC' : 'Pontic shad',
'SHD' : 'Allis and twaite shads',
'SHE' : 'Black Sea shad',
'SHL' : 'Lanternsharks nei',
'SHO' : 'Blackmouth catshark',
'SHQ' : 'Black-striped pipefish',
'SHR' : 'Sharpsnout seabream',
'SHX' : 'Dogfish sharks, etc. nei',
'SHZ' : 'Shads nei',
'SIB' : 'Muddy arrowtooth eel',
'SIC' : 'African sicklefish',
'SIL' : 'Silversides(=Sand smelts) nei',
'SIO' : 'None',
'SIP' : 'Silver pomfret',
'SIU' : 'Peanut worm',
'SIX' : 'Sardinellas nei',
'SIY' : 'None',
'SJA' : 'Great Mediterranean scallop',
'SKA' : 'Raja rays nei',
'SKB' : 'Sticklebacks',
'SKF' : 'African mud shrimp',
'SKH' : 'Various sharks nei',
'SKI' : 'Kissing prochilodus',
'SKJ' : 'Skipjack tuna',
'SKM' : 'Atlantic mud shrimp',
'SKX' : 'Sharks, rays, skates, etc. nei',
'SLD' : 'Santer seabream',
'SLF' : 'Carpenter seabream',
'SLI' : 'Spanish ling',
'SLM' : 'Salema',
'SLO' : 'Common spiny lobster',
'SLS' : 'Southern spiny lobster',
'SLV' : 'Tropical spiny lobsters nei',
'SLX' : 'Salmonoids nei',
'SLZ' : 'Salmonids nei',
'SMA' : 'Shortfin mako',
'SMC' : 'Smoothmouth sea catfish',
'SMD' : 'Smooth-hound',
'SME' : 'European smelt',
'SMP' : 'None',
'SNA' : 'Snappers nei',
'SNI' : 'Snipefishes nei',
'SNK' : 'Snoek',
'SNL' : 'Lane snapper',
'SNO' : 'Common snook',
'SNQ' : 'Small red scorpionfish',
'SNR' : 'Northern red snapper',
'SNS' : 'Longspine snipefish',
'SNX' : 'Snappers, jobfishes nei',
'SOA' : 'Southeast Atlantic soles nei',
'SOC' : 'Sockeye(=Red) salmon',
'SOI' : 'Razor clams, knife clams nei',
'SOL' : 'Common sole',
'SOM' : 'Wels(=Som) catfish',
'SOO' : 'None',
'SOP' : 'Southern pink shrimp',
'SOR' : 'Little sleeper shark',
'SOS' : 'Sand sole',
'SOT' : 'Spottail spiny turbot',
'SOW' : 'West coast sole',
'SOX' : 'Soles nei',
'SOZ' : 'Solenocerid shrimps nei',
'SPC' : 'Picarel',
'SPF' : 'Longbill spearfish',
'SPI' : 'Spinefeet(=Rabbitfishes) nei',
'SPK' : 'Great hammerhead',
'SPL' : 'Scalloped hammerhead',
'SPN' : 'Hammerhead sharks nei',
'SPO' : 'Sponges',
'SPQ' : 'Smalleye hammerhead',
'SPR' : 'European sprat',
'SPU' : 'Spotted seabass',
'SPX' : 'Salps',
'SPY' : 'Hammerhead sharks, etc. nei',
'SPZ' : 'Smooth hammerhead',
'SQA' : 'Argentine shortfin squid',
'SQC' : 'Common squids nei',
'SQE' : 'European flying squid',
'SQF' : 'Veined squid',
'SQG' : 'Angolan flying squid',
'SQI' : 'Northern shortfin squid',
'SQL' : 'Longfin squid',
'SQM' : 'Broadtail shortfin squid',
'SQP' : 'Patagonian squid',
'SQR' : 'European squid',
'SQS' : 'Sevenstar flying squid',
'SQU' : 'Various squids nei',
'SQY' : 'Squillids nei',
'SQZ' : 'Inshore squids nei',
'SRA' : 'Atlantic searobins',
'SRE' : 'Rudd',
'SRG' : 'Sargo breams nei',
'SRI' : 'Marbled spinefoot',
'SRJ' : 'Brown comber',
'SRK' : 'Painted comber',
'SRQ' : 'Halfspined flathead',
'SRR' : 'Antarctic starry skate',
'SRX' : 'Rays, stingrays, mantas nei',
'SSA' : 'Atlantic silverside',
'SSB' : 'Sand steenbras',
'SSD' : 'Surf clams nei',
'SSG' : 'Grooved sea squirt',
'SSH' : 'Scarlet shrimp',
'SSI' : 'Blackfin icefish',
'SSK' : 'Kaups arrowtooth eel',
'SSM' : 'Atlantic Spanish mackerel',
'SSP' : 'Shortbill spearfish',
'SSX' : 'Sea squirts nei',
'STB' : 'Striped bass',
'STC' : 'Black stone crab',
'STF' : 'Starfishes nei',
'STG' : 'Squeteague(=Gray weakfish)',
'STH' : 'Red starfish',
'STI' : 'Stingrays nei',
'STQ' : 'Broadnosed pipefish',
'STT' : 'Stingrays, butterfly rays nei',
'STU' : 'Sturgeons nei',
'STW' : 'Steenbrasses nei',
'SUA' : 'Sawback angelshark',
'SUR' : 'Surgeonfishes nei',
'SUT' : 'Smoothback angelshark',
'SVC' : 'Silver carp',
'SVE' : 'Striped venus',
'SVV' : 'Spotted-fin rockfish',
'SWA' : 'White seabream',
'SWB' : 'Brown seaweeds',
'SWG' : 'Green seaweeds',
'SWM' : 'Swimming crabs, etc. nei',
'SWO' : 'Swordfish',
'SWP' : 'Fleshy dilsea',
'SWQ' : 'Red delesseria',
'SWR' : 'Red seaweeds',
'SWX' : 'Seaweeds nei',
'SXD' : 'Diaphanous hatchet fish',
'SYC' : 'Small-spotted catshark',
'SYN' : 'Blackmouth splitfin',
'SYR' : 'Knifetooth dogfish',
'SYT' : 'Nursehound',
'SYX' : 'Catsharks, etc. nei',
'TAL' : 'Big-scale pomfret',
'TAR' : 'Tarpon',
'TAS' : 'Rough pomfret',
'TBR' : 'Goldsinny-wrasse',
'TCW' : 'Hairtails nei',
'TDA' : 'Black-faced blenny',
'TDF' : 'Toadfishes nei',
'TDQ' : 'Lesser flying squid',
'TGE' : 'Beaugregory',
'TGQ' : 'Fourhorn sculpin',
'TGS' : 'Caramote prawn',
'TGV' : 'Smalleye squaretail',
'THF' : 'Threadfins, tasselfishes nei',
'THR' : 'Thresher sharks nei',
'THS' : 'Thickback soles nei',
'TIG' : 'Tiger shark',
'TIL' : 'Great Northern tilefish',
'TIP' : 'Green tiger prawn',
'TIS' : 'Tilefishes nei',
'TJX' : 'Atlantic thornyhead',
'TLA' : 'Arctic grayling',
'TLD' : 'St. Pauls fingerfin',
'TLV' : 'Grayling',
'TMB' : 'Southwest Atlantic butterfish',
'TMP' : 'Ornate wrasse',
'TOA' : 'Antarctic toothfish',
'TOD' : 'Electric rays nei',
'TOE' : 'Torpedo rays',
'TOG' : 'Longfin pompano',
'TOM' : 'Atlantic tomcod',
'TOO' : 'Guinean pompano',
'TOP' : 'Patagonian toothfish',
'TOQ' : 'Helmet ton',
'TOX' : 'Tonguefishes',
'TOZ' : 'Lesser weever',
'TPA' : 'Dealfish',
'TPS' : 'Carpet shells nei',
'TQF' : 'Brittle pen shell',
'TRA' : 'Weeverfishes nei',
'TRB' : 'Whitetip reef shark',
'TRC' : 'Slimeheads nei',
'TRE' : 'Jacks, crevalles nei',
'TRG' : 'Grey triggerfish',
'TRI' : 'Triggerfishes, durgons nei',
'TRK' : 'Houndsharks, smoothhounds nei',
'TRO' : 'Trouts nei',
'TRP' : 'Dealfishes',
'TRQ' : 'Mediterranean dealfish',
'TRR' : 'Rainbow trout',
'TRS' : 'Sea trout',
'TRZ' : 'White trevally',
'TSD' : 'Twaite shad',
'TST' : 'Sickle pomfret',
'TSU' : 'Roughsnout grenadier',
'TTO' : 'Electric ray',
'TTR' : 'Marbled electric ray',
'TTV' : 'Common torpedo',
'TTX' : 'Marine turtles nei',
'TUG' : 'Green turtle',
'TUN' : 'Tunas nei',
'TUR' : 'Turbot',
'TUS' : 'True tunas nei',
'TUX' : 'Tuna-like fishes nei',
'TVQ' : 'Threadfin smooth-head',
'TVY' : 'None',
'TWL' : 'Tellins nei',
'TZA' : 'Spotted weever',
'TZR' : 'Starry weever',
'TZY' : 'Spiny scorpionfish',
'UAE' : 'Zebra tilefish',
'UBA' : 'Blue fathead',
'UBI' : 'None',
'UBS' : 'Drums nei',
'UCA' : 'Canary drum(=Baardman)',
'UCC' : 'Swamp ghost crab',
'UCG' : 'West african fiddler crab',
'UCU' : 'None',
'UDP' : 'Wakame',
'UDV' : 'Club scallop',
'UGU' : 'Unihorn octopus',
'UHL' : 'Bigfin reef squid',
'UHQ' : 'Devil firefish',
'UKB' : 'Black sea urchin',
'ULF' : 'Royal flagfin',
'ULO' : 'Solid surf clam',
'ULT' : 'Subtruncate surf clam',
'ULV' : 'None',
'UMO' : 'Fusca drum',
'UPH' : 'Pors goatfish',
'UPM' : 'Goldband goatfish',
'URA' : 'Stargazers',
'URC' : 'Sea urchins nei',
'URK' : 'Blainvilles murex',
'URM' : 'Stony sea urchin',
'URQ' : 'Rugose squat lobster',
'URS' : 'European edible sea urchin',
'URX' : 'Sea urchins, etc. nei',
'USB' : 'Ballan wrasse',
'USE' : 'Cottonmouth jack',
'USI' : 'Cuckoo wrasse',
'USK' : 'Tusk(=Cusk)',
'USY' : 'Blue-barred parrotfish',
'UUC' : 'Stargazer',
'UVU' : 'Sea lettuce',
'UYH' : 'Sea lettuces nei',
'VAD' : 'Vadigo',
'VEN' : 'None',
'VEV' : 'Warty venus',
'VIV' : 'Vimba bream',
'VLO' : 'Spiny lobsters nei',
'VMA' : 'Atlantic chub mackerel',
'VNA' : 'Golden carpet shell',
'VNR' : 'Banded carpet shell',
'VRA' : 'White-edged lyretail',
'VRL' : 'Yellow-edged lyretail',
'VSC' : 'Variegated scallop',
'VSP' : 'Spiny vase',
'VUC' : 'Corrugated venus',
'WAG' : 'None',
'WAH' : 'Wahoo',
'WEG' : 'Greater weever',
'WEX' : 'Weevers nei',
'WHA' : 'Hapuku wreckfish',
'WHB' : 'Blue whiting(=Poutassou)',
'WHE' : 'Whelk',
'WHF' : 'Whitefishes nei',
'WHG' : 'Whiting',
'WHM' : 'Atlantic white marlin',
'WHX' : 'Whelks',
'WIO' : 'Cyclope sole',
'WIT' : 'Witch flounder',
'WJP' : 'None',
'WKB' : 'Smalltooth weakfish',
'WKK' : 'King weakfish',
'WKP' : 'Western king prawn',
'WKX' : 'Weakfishes nei',
'WOR' : 'Marine worms',
'WRA' : 'Wrasses, hogfishes, etc. nei',
'WRF' : 'Wreckfish',
'WRM' : 'Brown wrasse',
'WRR' : 'Whiteheads round herring',
'WRV' : 'Green wrasse',
'WRX' : 'Labrus wrasses nei',
'WSA' : 'Blacktail comber',
'WSH' : 'Great white shark',
'WWA' : 'Striped weever',
'WZA' : 'Cadenats sole',
'XFS' : 'Xiphopenaeus shrimps nei',
'XIX' : 'Helmets shells nei',
'XKX' : 'Plesionika shrimps nei',
'XOD' : 'Trisopterus nei',
'XPO' : 'Silver pomfrets nei',
'XSX' : 'Scorpionfishes, gurnards nei',
'XYN' : 'Pearly razorfish',
'YEL' : 'Yellowtail flounder',
'YEX' : 'Limandas nei',
'YFC' : 'Grey wrasse',
'YFM' : 'Corkwing wrasse',
'YFO' : 'None',
'YFT' : 'Yellowfin tuna',
'YFU' : 'Elongate tonguesole',
'YFX' : 'Symphodus wrasses nei',
'YLL' : 'Mediterranean slipper lobster',
'YMS' : 'Monaco shrimp',
'YMW' : 'Deep water sole',
'YNA' : 'Acoupa weakfish',
'YNM' : 'Smallscale weakfish',
'YNU' : 'Portuguese sole',
'YNV' : 'Green weakfish',
'YNY' : 'Guinean sole',
'YOE' : 'Senegalese tonguesole',
'YOI' : 'Canary tonguesole',
'YOX' : 'Tonguesole nei',
'YPT' : 'None',
'YQG' : 'Guinean tonguesole',
'YRB' : 'Obtuse barracuda',
'YRS' : 'European barracuda',
'YST' : 'None',
'YTC' : 'Yellowtail amberjack',
'YTL' : 'Longfin yellowtail',
'YTN' : 'Crystal goby',
'YUR' : 'Diamondback squid',
'ZAC' : 'Deepbody boarfish',
'ZEX' : 'Dories nei',
'ZEZ' : 'None',
'ZGC' : 'Mediterranean sand eel',
'ZGP' : 'Topknot',
'ZGS' : 'Smooth sandeel'}

In [190]:
# replace codes in column "species" with english names
fish_catch_EU["species"] = fish_catch_EU["species"].map(species_dict)
fish_catch_EU.head()

Unnamed: 0,species,fishing_region,unit,country,year,volume
0,Adriatic sturgeon,All fishing regions,TLW,Denmark,2020,0
1,Adriatic sturgeon,All fishing regions,TLW,European Union,2019,0
2,Adriatic sturgeon,All fishing regions,TLW,European Union,2020,0
4,Adriatic sturgeon,All fishing regions,TLW,Ireland,2019,0
5,Adriatic sturgeon,All fishing regions,TLW,Ireland,2020,0


In [191]:
# replace unit - codes with names
# unique values unit
fish_catch_EU["unit"].unique()
fish_catch_EU["unit"] = fish_catch_EU["unit"].map({"TLW" : "Tonnes live weight"})
fish_catch_EU.head()

Unnamed: 0,species,fishing_region,unit,country,year,volume
0,Adriatic sturgeon,All fishing regions,Tonnes live weight,Denmark,2020,0
1,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2019,0
2,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2020,0
4,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2019,0
5,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2020,0


## Step 7: Inspect Null Values / NaNs and datatypes with info()  

In [192]:
# NaN in whole dataframe 
fish_catch_EU.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 255218 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   species         255218 non-null  object
 1   fishing_region  255218 non-null  object
 2   unit            255218 non-null  object
 3   country         255218 non-null  object
 4   year            255218 non-null  int64 
 5   volume          252675 non-null  object
dtypes: int64(1), object(5)
memory usage: 13.6+ MB


In [193]:
# show NaN in values, "sub"-dataframe
fish_catch_EU_nulls = fish_catch_EU[fish_catch_EU.isna().any(axis=1)]
fish_catch_EU_nulls

Unnamed: 0,species,fishing_region,unit,country,year,volume
1788,Albacore,All fishing regions,Tonnes live weight,European Union,2018,
1789,Albacore,All fishing regions,Tonnes live weight,European Union,2019,
1862,Albacore,All fishing regions,Tonnes live weight,Ireland,2018,
1863,Albacore,All fishing regions,Tonnes live weight,Ireland,2019,
2053,Albacore,"Atlantic, Northeast",Tonnes live weight,European Union,2018,
...,...,...,...,...,...,...
307195,Dories nei,All fishing regions,Tonnes live weight,Ireland,2020,
307215,Dories nei,"Atlantic, Northeast",Tonnes live weight,European Union,2018,
307217,Dories nei,"Atlantic, Northeast",Tonnes live weight,European Union,2020,
307228,Dories nei,"Atlantic, Northeast",Tonnes live weight,Ireland,2018,


In [194]:
# fill rows with NaN in values with 0 as string to not mix datatypes - later convert to float
fish_catch_EU["volume"].fillna(value="0", inplace=True)
display(fish_catch_EU.head(),fish_catch_EU.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 255218 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   species         255218 non-null  object
 1   fishing_region  255218 non-null  object
 2   unit            255218 non-null  object
 3   country         255218 non-null  object
 4   year            255218 non-null  int64 
 5   volume          255218 non-null  object
dtypes: int64(1), object(5)
memory usage: 13.6+ MB


Unnamed: 0,species,fishing_region,unit,country,year,volume
0,Adriatic sturgeon,All fishing regions,Tonnes live weight,Denmark,2020,0
1,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2019,0
2,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2020,0
4,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2019,0
5,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2020,0


None

In [195]:
# drop rows where species name was not available in english in Eurostat-classification. Named species = None in dictionary
fish_catch_EU.query("species == 'None'") #show rows with None
fish_catch_EU = fish_catch_EU[fish_catch_EU.species != "None"] # logic: keep only rows without None, assign variable again.
display(fish_catch_EU.head(),fish_catch_EU.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 253597 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   species         253597 non-null  object
 1   fishing_region  253597 non-null  object
 2   unit            253597 non-null  object
 3   country         253597 non-null  object
 4   year            253597 non-null  int64 
 5   volume          253597 non-null  object
dtypes: int64(1), object(5)
memory usage: 13.5+ MB


Unnamed: 0,species,fishing_region,unit,country,year,volume
0,Adriatic sturgeon,All fishing regions,Tonnes live weight,Denmark,2020,0
1,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2019,0
2,Adriatic sturgeon,All fishing regions,Tonnes live weight,European Union,2020,0
4,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2019,0
5,Adriatic sturgeon,All fishing regions,Tonnes live weight,Ireland,2020,0


None

## Step 8: Autofill Null Values when merged cell in excel/csv file is empty   
```df[["column name 1", "column name 2"]] = df[["column name 1", "column name 2"]].fillna(method='ffill', axis=0)```

## Step 9: Change data types if necessary:   
* year = integer
* volume = float, round to one decimal. Convert kilograms into tonnes (1t = 1000 kg)
* value = float, round to two decimals 

In [196]:
# show data types
fish_catch_EU.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 253597 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   species         253597 non-null  object
 1   fishing_region  253597 non-null  object
 2   unit            253597 non-null  object
 3   country         253597 non-null  object
 4   year            253597 non-null  int64 
 5   volume          253597 non-null  object
dtypes: int64(1), object(5)
memory usage: 13.5+ MB


In [197]:
# change data types 
# year = integer - already right
#volume = float, round to one decimal. Convert kilograms into tonnes (1t = 1000 kg)
fish_catch_EU["volume"] = fish_catch_EU["volume"].map(lambda x : x.replace(',', '.')) # cannot convert to float with commas? got error message
fish_catch_EU["volume"] = pd.to_numeric(fish_catch_EU["volume"])
fish_catch_EU.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 253597 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   species         253597 non-null  object 
 1   fishing_region  253597 non-null  object 
 2   unit            253597 non-null  object 
 3   country         253597 non-null  object 
 4   year            253597 non-null  int64  
 5   volume          253597 non-null  float64
dtypes: float64(1), int64(1), object(4)
memory usage: 13.5+ MB


In [198]:
# Check format for volume as there a very big numbers
fish_catch_EU[["volume"]].sort_values(by="volume", ascending=False)

Unnamed: 0,volume
69578,4393614484.00
71905,4370893384.00
79780,4370041954.00
77498,3959352968.00
88486,3939513348.00
...,...
70032,0.00
70031,0.00
70014,0.00
69992,0.00


## Step 10: Species cleaning 
* aggregate species = salmon, tuna, lobster, shrimp  
* sum up species in categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon

In [199]:
# change order of rows
fish_catch_EU = fish_catch_EU[["country", "year", "unit", "volume", "fishing_region", "species"]]
fish_catch_EU.head()

Unnamed: 0,country,year,unit,volume,fishing_region,species
0,Denmark,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
1,European Union,2019,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
2,European Union,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
4,Ireland,2019,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
5,Ireland,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon


In [200]:
# aggregate species - new dataframe with only 4 selected species = salmon, tuna, lobster and shrimps

fish_catch_selected_species = fish_catch_EU.query("species.str.contains('salmon|tuna|lobster|shrimp', case=False)") #or species.str.contains('tuna', case=False) or species.str.contains('lobster', case=False) or species.str.contains('shrimp', case=False)")
fish_catch_selected_species.reset_index(drop=True, inplace=True) # reset index for rows, important: inplace = True
fish_catch_selected_species.head()

Unnamed: 0,country,year,unit,volume,fishing_region,species
0,Germany,2014,Tonnes live weight,10745.0,All fishing regions,Aesop shrimp
1,Germany,2015,Tonnes live weight,2529.0,All fishing regions,Aesop shrimp
2,Germany,2016,Tonnes live weight,191.0,All fishing regions,Aesop shrimp
3,Germany,2017,Tonnes live weight,0.11,All fishing regions,Aesop shrimp
4,Germany,2018,Tonnes live weight,0.0,All fishing regions,Aesop shrimp


In [201]:
# create list of conditions
conditions = [(fish_catch_selected_species["species"].str.contains("salmon|tuna" , case=False)),
            (fish_catch_selected_species["species"].str.contains("tuna", case=False)), 
            (fish_catch_selected_species["species"].str.contains("lobster", case=False)), 
            fish_catch_selected_species["species"].str.contains("shrimp", case=False)]
#create list of values we want to assign for each condition
values = ["Salmon", "Tuna", "Lobster", "Shrimp"]

fish_catch_selected_species["species_agg"] = np.select(conditions, values)
fish_catch_selected_species.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fish_catch_selected_species["species_agg"] = np.select(conditions, values)


Unnamed: 0,country,year,unit,volume,fishing_region,species,species_agg
0,Germany,2014,Tonnes live weight,10745.0,All fishing regions,Aesop shrimp,Shrimp
1,Germany,2015,Tonnes live weight,2529.0,All fishing regions,Aesop shrimp,Shrimp
2,Germany,2016,Tonnes live weight,191.0,All fishing regions,Aesop shrimp,Shrimp
3,Germany,2017,Tonnes live weight,0.11,All fishing regions,Aesop shrimp,Shrimp
4,Germany,2018,Tonnes live weight,0.0,All fishing regions,Aesop shrimp,Shrimp


In [202]:
# aggregate species to categories - categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon
# create list of conditions
conditions = [(fish_catch_selected_species["species_agg"].str.contains("Salmon|Tuna")),
            (fish_catch_selected_species["species_agg"].str.contains("Lobster|Shrimp"))]
#create list of values we want to assign for each condition
values = ["Pelagic fish", "Crustaceans"]

fish_catch_selected_species["categories_species"] = np.select(conditions, values)
fish_catch_selected_species.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fish_catch_selected_species["categories_species"] = np.select(conditions, values)


Unnamed: 0,country,year,unit,volume,fishing_region,species,species_agg,categories_species
0,Germany,2014,Tonnes live weight,10745.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
1,Germany,2015,Tonnes live weight,2529.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
2,Germany,2016,Tonnes live weight,191.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
3,Germany,2017,Tonnes live weight,0.11,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
4,Germany,2018,Tonnes live weight,0.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans


In [203]:
fish_catch_selected_species.tail(20)

Unnamed: 0,country,year,unit,volume,fishing_region,species,species_agg,categories_species
19278,Malta,2008,Tonnes live weight,0.1,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19279,Malta,2009,Tonnes live weight,0.3,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19280,Malta,2010,Tonnes live weight,0.2,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19281,Malta,2012,Tonnes live weight,0.03,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19282,Malta,2013,Tonnes live weight,0.09,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19283,Malta,2014,Tonnes live weight,137.0,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19284,Malta,2015,Tonnes live weight,326.0,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19285,Malta,2016,Tonnes live weight,1058.0,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19286,Malta,2017,Tonnes live weight,0.4,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans
19287,Malta,2018,Tonnes live weight,484.0,Mediterranean and Black Sea,Mediterranean slipper lobster,Lobster,Crustaceans


In [204]:
display(fish_catch_EU.head(), 
        fish_catch_EU.info(),
        fish_catch_selected_species.head(),
        fish_catch_selected_species.info()
        )

<class 'pandas.core.frame.DataFrame'>
Int64Index: 253597 entries, 0 to 307398
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   country         253597 non-null  object 
 1   year            253597 non-null  int64  
 2   unit            253597 non-null  object 
 3   volume          253597 non-null  float64
 4   fishing_region  253597 non-null  object 
 5   species         253597 non-null  object 
dtypes: float64(1), int64(1), object(4)
memory usage: 13.5+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19298 entries, 0 to 19297
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   country             19298 non-null  object 
 1   year                19298 non-null  int64  
 2   unit                19298 non-null  object 
 3   volume              19298 non-null  float64
 4   fishing_region      19298 non-null  object 
 5   spe

Unnamed: 0,country,year,unit,volume,fishing_region,species
0,Denmark,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
1,European Union,2019,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
2,European Union,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
4,Ireland,2019,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon
5,Ireland,2020,Tonnes live weight,0.0,All fishing regions,Adriatic sturgeon


None

Unnamed: 0,country,year,unit,volume,fishing_region,species,species_agg,categories_species
0,Germany,2014,Tonnes live weight,10745.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
1,Germany,2015,Tonnes live weight,2529.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
2,Germany,2016,Tonnes live weight,191.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
3,Germany,2017,Tonnes live weight,0.11,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
4,Germany,2018,Tonnes live weight,0.0,All fishing regions,Aesop shrimp,Shrimp,Crustaceans


None

## Step 11: Decoding data
## Step 12: Punctuation = decimals separator: comma, thousands separator: dot 

In [205]:
# change separators to european standard de_DE.UTF-8
# doesn't work - not really necessary

import locale
locale.setlocale(locale.LC_ALL, "de_DE.UTF-8")

#fish_catch_EU["volume_EU"] = fish_catch_EU["volume"].apply(lambda x : locale.atof(x))


'de_DE.UTF-8'

## Step 13: language = english 
## Step 14: Merging dataframes: tbd after Steps 1 - 13 are done

In [206]:
# Final dataframes here:
fish_catch_EU # all species
fish_catch_selected_species

Unnamed: 0,country,year,unit,volume,fishing_region,species,species_agg,categories_species
0,Germany,2014,Tonnes live weight,10745.00,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
1,Germany,2015,Tonnes live weight,2529.00,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
2,Germany,2016,Tonnes live weight,191.00,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
3,Germany,2017,Tonnes live weight,0.11,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
4,Germany,2018,Tonnes live weight,0.00,All fishing regions,Aesop shrimp,Shrimp,Crustaceans
...,...,...,...,...,...,...,...,...
19293,Portugal,2014,Tonnes live weight,0.23,"Indian Ocean, Western",Mediterranean slipper lobster,Lobster,Crustaceans
19294,Spain,2019,Tonnes live weight,0.01,All fishing regions,Monaco shrimp,Shrimp,Crustaceans
19295,European Union,2019,Tonnes live weight,0.01,All fishing regions,Monaco shrimp,Shrimp,Crustaceans
19296,Spain,2019,Tonnes live weight,0.01,Mediterranean and Black Sea,Monaco shrimp,Shrimp,Crustaceans


# Aquaculture production in EU

## Step 1: Import file and create dataframe with individual variable

In [207]:
aquaculture_prod_EU = pd.read_csv("data/X_Seafood Production Data/Aquaculture_EU_full data.csv", low_memory=False) #warning popped up, suggested low_memory =False

## Step 2: Inspect dataframe with head(), info(), shape, columns, tail(), describe()

In [208]:
display (aquaculture_prod_EU.head(),
         aquaculture_prod_EU.info(),
         aquaculture_prod_EU.shape,
         aquaculture_prod_EU.columns,
         aquaculture_prod_EU.tail(),
         aquaculture_prod_EU.describe()
        )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 12 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   DATAFLOW     1048575 non-null  object
 1   LAST UPDATE  1048575 non-null  object
 2   freq         1048575 non-null  object
 3   aquameth     1048575 non-null  object
 4   aquaenv      1048575 non-null  object
 5   species      1048575 non-null  object
 6   fishreg      1048575 non-null  object
 7   unit         1048575 non-null  object
 8   geo          1048575 non-null  object
 9   TIME_PERIOD  1048575 non-null  int64 
 10  OBS_VALUE    857566 non-null   object
 11  OBS_FLAG     276743 non-null   object
dtypes: int64(1), object(11)
memory usage: 96.0+ MB


Unnamed: 0,DATAFLOW,LAST UPDATE,freq,aquameth,aquaenv,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
0,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,CAG,BRK,F00,0,EUR,EU27_2020,2009,31511300,
1,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,CAG,BRK,F00,0,EUR,EU27_2020,2010,34381030,
2,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,CAG,BRK,F00,0,EUR,EU27_2020,2011,35084520,
3,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,CAG,BRK,F00,0,EUR,EU27_2020,2012,31307680,e
4,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,CAG,BRK,F00,0,EUR,EU28,2009,31511300,


None

(1048575, 12)

Index(['DATAFLOW', 'LAST UPDATE', 'freq', 'aquameth', 'aquaenv', 'species',
       'fishreg', 'unit', 'geo', 'TIME_PERIOD', 'OBS_VALUE', 'OBS_FLAG'],
      dtype='object')

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,aquameth,aquaenv,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
1048570,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,TOTAL,SEA,F20,10,EUR_T,EU27_2020,2010,3795489,
1048571,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,TOTAL,SEA,F20,10,EUR_T,EU27_2020,2011,3004569,
1048572,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,TOTAL,SEA,F20,10,EUR_T,EU27_2020,2012,3156128,
1048573,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,TOTAL,SEA,F20,10,EUR_T,EU28,2008,455366,
1048574,ESTAT:FISH_AQ2A(1.0),10/06/22 23:00:00,A,TOTAL,SEA,F20,10,EUR_T,EU28,2009,332322,


Unnamed: 0,TIME_PERIOD
count,1048575.0
mean,2014.07
std,3.73
min,2008.0
25%,2011.0
50%,2014.0
75%,2017.0
max,2020.0


## Step 3: Delete non necessary columns and rows

In [209]:
# delete columns
#Index(['DATAFLOW', 'LAST UPDATE', 'freq', 'aquameth', 'aquaenv', 'species','fishreg', 'unit', 'geo', 'TIME_PERIOD', 'OBS_VALUE', 'OBS_FLAG'],dtype='object')
aquaculture_prod_EU.drop(columns = ['DATAFLOW', 'LAST UPDATE', 'freq', 'OBS_FLAG'] , inplace=True)
aquaculture_prod_EU.head()

Unnamed: 0,aquameth,aquaenv,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE
0,CAG,BRK,F00,0,EUR,EU27_2020,2009,31511300
1,CAG,BRK,F00,0,EUR,EU27_2020,2010,34381030
2,CAG,BRK,F00,0,EUR,EU27_2020,2011,35084520
3,CAG,BRK,F00,0,EUR,EU27_2020,2012,31307680
4,CAG,BRK,F00,0,EUR,EU28,2009,31511300


In [210]:
# delete rows with EU28 (refers to EU including UK before Brexit)
aquaculture_prod_EU = aquaculture_prod_EU[aquaculture_prod_EU.geo != "EU28"] # logic: keep only rows without EU28, assign variable again.
aquaculture_prod_EU.head()

Unnamed: 0,aquameth,aquaenv,species,fishreg,unit,geo,TIME_PERIOD,OBS_VALUE
0,CAG,BRK,F00,0,EUR,EU27_2020,2009,31511300
1,CAG,BRK,F00,0,EUR,EU27_2020,2010,34381030
2,CAG,BRK,F00,0,EUR,EU27_2020,2011,35084520
3,CAG,BRK,F00,0,EUR,EU27_2020,2012,31307680
8,CAG,BRK,F00,0,EUR,FI,2009,31511300


## Step 4: Rename columns lower case, snake case, spaces, delimiters  

In [211]:
# Convert columns in lower case, change names + snake case
aquaculture_prod_EU.columns = [x.lower() for x in aquaculture_prod_EU.columns]
aquaculture_prod_EU.rename(columns = {"aquameth" : "aqua_method",
                                "aquaenv" : "aqua_environment",
                                "fishreg" : "production_region", 
                                "geo" : "country", 
                                "time_period" : "year", 
                                "obs_value" : "volume"}, 
                                inplace=True, 
                                errors = "ignore") #errors =ignore to not get the warning message

aquaculture_prod_EU.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,CAG,BRK,F00,0,EUR,EU27_2020,2009,31511300
1,CAG,BRK,F00,0,EUR,EU27_2020,2010,34381030
2,CAG,BRK,F00,0,EUR,EU27_2020,2011,35084520
3,CAG,BRK,F00,0,EUR,EU27_2020,2012,31307680
8,CAG,BRK,F00,0,EUR,FI,2009,31511300


## Step 5: Rename row value names. First letter upper case e.g. EUR = Eur
## Step 6: Replace (country/species) abbreviations with full names by using dictionaries 

In [212]:
# unique countries in dataset
aquaculture_prod_EU.country.unique()
#country dictionary for this dataset
country_dict_a = {'EU27_2020' : 'European Union',
'AL' : 'Albania',
'UK' : 'United Kingdom',
'LT' : 'Lithuania',
'FI' : 'Finland',
'AT' : 'Austria',
'ME' : 'Montenegro',
'NL' : 'Netherlands',
'EU' : 'European Union',
'CZ' : 'Czech Republic',
'HR' : 'Croatia',
'BA' : 'Bosnia and Herzegovina',
'IS' : 'Iceland',
'EE' : 'Estonia',
'CY' : 'Cyprus',
'HU' : 'Hungary',
'SE' : 'Sweden',
'EL' : 'Greece',
'MT' : 'Malta',
'BG' : 'Bulgaria',
'ES' : 'Spain',
'SI' : 'Slovenia',
'DE' : 'Germany',
'FR' : 'France',
'NO' : 'Norway',
'IT' : 'Italy',
'PT' : 'Portugal',
'DK' : 'Denmark',
'PL' : 'Poland',
'RS' : 'Serbia',
'IE' : 'Ireland',
'RO' : 'Romania',
'SK' : 'Slovakia',
'LV' : 'Latvia',
'TR' : 'Turkey',
'BE' : 'Belgium'}

In [213]:
# rename values within column country
aquaculture_prod_EU["country"] = aquaculture_prod_EU["country"].map(country_dict_a)
aquaculture_prod_EU.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,CAG,BRK,F00,0,EUR,European Union,2009,31511300
1,CAG,BRK,F00,0,EUR,European Union,2010,34381030
2,CAG,BRK,F00,0,EUR,European Union,2011,35084520
3,CAG,BRK,F00,0,EUR,European Union,2012,31307680
8,CAG,BRK,F00,0,EUR,Finland,2009,31511300


In [214]:
# dictionary species for aquaculture dataset as there are different species as in fish_catch

species_dict_a = {'AAA' : 'Adriatic sturgeon',
'AAO' : 'Atlantic sturgeon',
'AAS' : 'Noble crayfish',
'ABJ' : 'Japanese abalone',
'ABX' : 'Abalones nei',
'ACH' : 'Arctic char',
'ALR' : 'Bleak',
'AMB' : 'Greater amberjack',
'AMX' : 'Amberjacks nei',
'APB' : 'Siberian sturgeon',
'APE' : 'Starry sturgeon',
'APG' : 'Danube sturgeon(=Osetr)',
'APN' : 'White sturgeon',
'APR' : 'Sterlet sturgeon',
'APU' : 'Sturgeon',
'ASR' : 'Harpoon seaweeds',
'ASU' : 'Asp',
'ATB' : 'Big-scale sand smelt',
'AUP' : 'White-clawed crayfish',
'AYS' : 'Euro-American crayfishes nei',
'BAH' : 'Bastard halibut',
'BFT' : 'Atlantic bluefin tuna',
'BIC' : 'Bighead carp',
'BOY' : 'Purple dye murex',
'BSC' : 'Bluespotted seabream',
'BUF' : 'Buffalofishes nei',
'CAF' : 'Catfishes nei',
'CGO' : 'Goldfish',
'CHR' : 'Chars nei',
'CIJ' : 'Peled',
'CIQ' : 'Broad whitefish',
'CLH' : 'Northern quahog(=Hard clam)',
'CLJ' : 'Japanese carpet shell',
'CLS' : 'Sand gaper',
'CLX' : 'Clams, etc. nei',
'CLZ' : 'North African catfish',
'COB' : 'Shi drum',
'COC' : 'Common edible cockle',
'COD' : 'Atlantic cod',
'COH' : 'Coho(=Silver) salmon',
'CPR' : 'Common prawn',
'CRA' : 'Marine crabs nei',
'CRD' : 'Danube crayfish',
'CRG' : 'Green crab',
'CRP' : 'Red claw crayfish',
'CRU' : 'Marine crustaceans nei',
'CTC' : 'Common cuttlefish',
'CTG' : 'Grooved carpet shell',
'CTO' : 'Torpedo-shaped catfishes nei',
'CTS' : 'None',
'CWG' : 'Prussian carp',
'DEC' : 'Common dentex',
'DEP' : 'Pink dentex',
'DRU' : 'Sciaenas nei',
'ELE' : 'European eel',
'EQE' : 'Pod razor shell',
'EQI' : 'Sword razor shell',
'EWA' : 'European crayfishes nei',
'F01' : 'Total aquatic animals',
'F02' : 'Total freshwater and diadromous fish',
'F04' : 'Total shellfish',
'F07' : 'Total finfish',
'F08' : 'Total finfish and shellfish',
'F10' : 'Freshwater fishes',
'F12' : 'Tilapias and other cichlids',
'F13' : 'Miscellaneous freshwater fishes',
'F20' : 'Diadromous fishes',
'F21' : 'Sturgeons, paddlefishes',
'F22' : 'River eels',
'F23' : 'Salmons, trouts, smelts',
'F25' : 'Miscellaneous diadromous fishes',
'F30' : 'Marine fishes',
'F31' : 'Flounders, halibuts, soles',
'F32' : 'Cods, hakes, haddocks',
'F36' : 'Tunas, bonitos, billfishes',
'F37' : 'Miscellaneous pelagic fishes',
'F39' : 'Marine fishes not identified',
'F40' : 'Crustaceans',
'F41' : 'Freshwater crustaceans',
'F42' : 'Crabs, sea-spiders',
'F50' : 'Molluscs',
'F51' : 'Freshwater molluscs',
'F52' : 'Abalones, winkles, conchs',
'F53' : 'Oysters',
'F54' : 'Mussels',
'F55' : 'Scallops, pectens',
'F57' : 'Squids, cuttlefishes, octopuses',
'F58' : 'Miscellaneous marine molluscs',
'F70' : 'Miscellaneous aquatic animals',
'F71' : 'None',
'F77' : 'Miscellaneous aquatic invertebrates',
'F90' : 'Aquatic plants',
'F91' : 'Brown seaweeds',
'F93' : 'Green seaweeds',
'FBM' : 'Freshwater bream',
'FBU' : 'Burbot',
'FCC' : 'Crucian carp',
'FCG' : 'Grass carp(=White amur)',
'FCY' : 'Cyprinids nei',
'FID' : 'Orfe(=Ide)',
'FIN' : 'Finfishes nei',
'FLE' : 'European flounder',
'FPE' : 'European perch',
'FPI' : 'Northern pike',
'FPP' : 'Pike-perch',
'FRG' : 'Frogs',
'FRO' : 'Roach',
'FRX' : 'Roaches nei',
'FSI' : 'Freshwater siluroids nei',
'FTE' : 'Tench',
'FVE' : 'Vendace',
'GAS' : 'Gastropods nei',
'GIP' : 'Barramundi(=Giant seaperch)',
'GKA' : 'Leather gracilaria',
'GLS' : 'Gracilaria seaweeds',
'GOG' : 'Gudgeon',
'HAL' : 'Atlantic halibut',
'HLW' : 'None',
'HON' : 'Common nase',
'HQW' : 'Unicell. chlorella green alga',
'HUC' : 'Huchen',
'HUH' : 'Beluga',
'HXP' : 'Silver, bighead carps nei',
'INV' : 'Aquatic invertebrates nei',
'ITE' : 'Brown bullhead',
'ITM' : 'Black bullhead',
'ITP' : 'Channel catfish',
'KTG' : 'Olive green cockle',
'KUP' : 'Kuruma prawn',
'LAT' : 'Lake trout(=Char)',
'LPZ' : 'Limpets nei',
'LQX' : 'Sea belt',
'LUH' : 'Chub',
'MGR' : 'Meagre',
'MGS' : 'None',
'MOF' : 'Freshwater molluscs nei',
'MOL' : 'Marine molluscs nei',
'MPS' : 'Largemouth black bass',
'MSM' : 'Mediterranean mussel',
'MSX' : 'Sea mussels nei',
'MUF' : 'Flathead grey mullet',
'MUL' : 'Mullets nei',
'MUS' : 'Blue mussel',
'MZZ' : 'Marine fishes nei',
'OAL' : 'Senegalese sole',
'OBN' : 'Peppery furrow',
'OCC' : 'Common octopus',
'ONA' : 'Golden trout',
'OST' : 'Flat and cupped oysters nei',
'OYC' : 'Cupped oysters nei',
'OYF' : 'European flat oyster',
'OYG' : 'Pacific cupped oyster',
'PAC' : 'Common pandora',
'PAL' : 'Palaemonid shrimps nei',
'PAM' : 'Mississippi paddlefish',
'PCL' : 'Signal crayfish',
'PEE' : 'Common periwinkle',
'PGS' : 'Striped catfish',
'PLN' : 'European whitefish',
'PNI' : 'Indian white prawn',
'POL' : 'Pollack',
'PTB' : 'Barbel',
'PVR' : 'Atlantic ditch shrimp',
'QOF' : 'Barcoo grunter',
'RAE' : 'European razor clam',
'RAZ' : 'Solen razor clams nei',
'RCW' : 'Red swamp crawfish',
'RDM' : 'Red drum',
'REA' : 'Redbanded seabream',
'RPG' : 'Red porgy',
'SAL' : 'Atlantic salmon',
'SBG' : 'Gilthead seabream',
'SBH' : 'Striped bass, hybrid',
'SBR' : 'Blackspot seabream',
'SBX' : 'Porgies, seabreams nei',
'SCE' : 'Great Atlantic scallop',
'SHR' : 'Sharpsnout seabream',
'SIZ' : 'Spirulina nei',
'SLX' : 'Salmonoids nei',
'SMS' : 'None',
'SOL' : 'Common sole',
'SOM' : 'Wels(=Som) catfish',
'SPI' : 'Spinefeet(=Rabbitfishes) nei',
'SPU' : 'Spotted seabass',
'SRE' : 'Rudd',
'SRG' : 'Sargo breams nei',
'SRI' : 'Marbled spinefoot',
'STB' : 'Striped bass',
'STU' : 'Sturgeons nei',
'SVC' : 'Silver carp',
'SVE' : 'Striped venus',
'SVF' : 'Brook trout',
'SWA' : 'White seabream',
'SWB' : 'Brown seaweeds',
'SWG' : 'Green seaweeds',
'SWR' : 'Red seaweeds',
'SWX' : 'Seaweeds nei',
'TGS' : 'Caramote prawn',
'TLN' : 'Nile tilapia',
'TLP' : 'Tilapias nei',
'TLV' : 'Grayling',
'TPS' : 'Carpet shells nei',
'TRO' : 'Trouts nei',
'TRR' : 'Rainbow trout',
'TUR' : 'Turbot',
'TZZ' : 'None',
'UDP' : 'Wakame',
'UDS' : 'Wakame nei',
'ULL' : 'None',
'UNI]' : 'None',
'UVU' : 'Sea lettuce',
'VEV' : 'Warty venus',
'VIV' : 'Vimba bream',
'VNA' : 'Golden carpet shell',
'VNR' : 'Banded carpet shell',
'VSC' : 'Variegated scallop',
'VUC' : 'Corrugated venus',
'WHF' : 'Whitefishes nei',
'WKS' : 'Striped weakfish',
'YTC' : 'Yellowtail amberjack',
'ABK' : 'White bream',
'AJC' : 'Babberlocks',
'BKC' : 'Black carp',
'BSE' : 'Seabasses nei',
'BSS' : 'European seabass',
'BTT' : 'None',
'CLV' : 'Venus clams nei',
'DIN' : 'None',
'F00' : 'Total fishery products',
'F11' : 'Carps, barbels and other cyprinids',
'F33' : 'Miscellaneous coastal fishes',
'F45' : 'Shrimps, prawns',
'F47' : 'Miscellaneous marine crustaceans',
'F56' : 'Clams, cockles, arkshells',
'F92' : 'Red seaweeds',
'F94' : 'Miscellaneous aquatic plants',
'FCP' : 'Common carp',
'FRF' : 'Freshwater fishes nei',
'GJW' : 'Warty gracilaria',
'JDV' : 'Common spider crab',
'LUD' : 'Belica',
'MYV' : 'Mytilus mussels nei',
'PEN' : 'Penaeus shrimps nei',
'PNV' : 'Whiteleg shrimp',
'QSC' : 'Queen scallop',
'SBP' : 'Pargo breams nei',
'SLZ' : 'Salmonids nei',
'TRS' : 'Sea trout'}

In [215]:
# rename species in column species
aquaculture_prod_EU["species"] = aquaculture_prod_EU["species"].map(species_dict_a)
aquaculture_prod_EU.head()


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,CAG,BRK,Total fishery products,0,EUR,European Union,2009,31511300
1,CAG,BRK,Total fishery products,0,EUR,European Union,2010,34381030
2,CAG,BRK,Total fishery products,0,EUR,European Union,2011,35084520
3,CAG,BRK,Total fishery products,0,EUR,European Union,2012,31307680
8,CAG,BRK,Total fishery products,0,EUR,Finland,2009,31511300


In [216]:
# dictionary aqua_method # aqua_environment
method_dict = {'TOTAL' : 'All methods',
'PON' : 'Ponds',
'TNK' : 'Tanks and raceways',
'ENC' : 'Enclosures and pens',
'CAG' : 'Cages',
'RES' : 'Recirculation systems',
'ONB' : 'On bottom',
'OFB' : 'Off bottom',
'OTH' : 'Other methods',
'NSP' : 'Not specified'}

environment_dict = {'TOTAL' : 'Total',
'FRW' : 'Freshwater',
'SBW' : 'Sea and brackish water (total)',
'SEA' : 'Seawater',
'BRK' : 'Brackish water',
'NSP' : 'Not specified'}

In [217]:
# rename values in aqua_method + aqua_environment
aquaculture_prod_EU["aqua_method"] = aquaculture_prod_EU["aqua_method"].map(method_dict)
aquaculture_prod_EU["aqua_environment"] = aquaculture_prod_EU["aqua_environment"].map(environment_dict)
aquaculture_prod_EU.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,Cages,Brackish water,Total fishery products,0,EUR,European Union,2009,31511300
1,Cages,Brackish water,Total fishery products,0,EUR,European Union,2010,34381030
2,Cages,Brackish water,Total fishery products,0,EUR,European Union,2011,35084520
3,Cages,Brackish water,Total fishery products,0,EUR,European Union,2012,31307680
8,Cages,Brackish water,Total fishery products,0,EUR,Finland,2009,31511300


In [218]:
#unique regions in set
aquaculture_prod_EU.production_region.unique()

# dictionary production_region
production_reg_dict = {'27' : 'Northeast Atlantic',
'37' : 'Mediterranean and Black Sea',
'4' : 'Asian inland waters',
'5' : 'European inland waters',
'34' : 'Atlantic, Eastern Central',
'1' : 'African inland waters',
'10' : 'Marine areas (27+37)',
'9' : 'Total inland waters (5+1)',
'0' : 'All production areas',
'NSP' : 'Not specified'}

#rename values in production_region
aquaculture_prod_EU["production_region"] = aquaculture_prod_EU["production_region"].map(production_reg_dict)


In [219]:
# unique values in unit
aquaculture_prod_EU.unit.unique()
# rename abbreviations
aquaculture_prod_EU["unit"] = aquaculture_prod_EU["unit"].map({"TLW" : "Tonnes live weight", "EUR" : "Eur", "EUR_T" : "Euro per ton"})
aquaculture_prod_EU.head()


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2009,31511300
1,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2010,34381030
2,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2011,35084520
3,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2012,31307680
8,Cages,Brackish water,Total fishery products,All production areas,Eur,Finland,2009,31511300


## Step 7: Inspect Null Values / NaNs and datatypes with info()  

In [220]:
# Inspect NaNs in whole dataframe
aquaculture_prod_EU.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 870049 entries, 0 to 1048572
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   aqua_method        870049 non-null  object
 1   aqua_environment   870049 non-null  object
 2   species            869914 non-null  object
 3   production_region  870049 non-null  object
 4   unit               870049 non-null  object
 5   country            870049 non-null  object
 6   year               870049 non-null  int64 
 7   volume             725443 non-null  object
dtypes: int64(1), object(7)
memory usage: 59.7+ MB


In [221]:
# show NaN in values, "sub"-dataframe
aquaculture_prod_EU_nulls = aquaculture_prod_EU[aquaculture_prod_EU.isna().any(axis=1)]
aquaculture_prod_EU_nulls

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
1444,Cages,Freshwater,Siberian sturgeon,All production areas,Eur,Germany,2011,
1445,Cages,Freshwater,Siberian sturgeon,All production areas,Eur,Germany,2012,
1446,Cages,Freshwater,Siberian sturgeon,All production areas,Eur,Germany,2013,
1447,Cages,Freshwater,Siberian sturgeon,All production areas,Eur,Germany,2015,
1448,Cages,Freshwater,Siberian sturgeon,All production areas,Eur,Germany,2016,
...,...,...,...,...,...,...,...,...
1048391,All methods,Seawater,Miscellaneous freshwater fishes,Total inland waters (5+1),Tonnes live weight,European Union,2011,
1048396,All methods,Seawater,Diadromous fishes,All production areas,Eur,Denmark,2010,
1048406,All methods,Seawater,Diadromous fishes,All production areas,Eur,European Union,2010,
1048437,All methods,Seawater,Diadromous fishes,All production areas,Euro per ton,Denmark,2010,


In [222]:
# fill rows with NaN in values with 0 as string to not mix datatypes - later convert to float
aquaculture_prod_EU["volume"].fillna(value="0", inplace=True)
display(aquaculture_prod_EU.head(),aquaculture_prod_EU.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 870049 entries, 0 to 1048572
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   aqua_method        870049 non-null  object
 1   aqua_environment   870049 non-null  object
 2   species            869914 non-null  object
 3   production_region  870049 non-null  object
 4   unit               870049 non-null  object
 5   country            870049 non-null  object
 6   year               870049 non-null  int64 
 7   volume             870049 non-null  object
dtypes: int64(1), object(7)
memory usage: 59.7+ MB


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2009,31511300
1,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2010,34381030
2,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2011,35084520
3,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2012,31307680
8,Cages,Brackish water,Total fishery products,All production areas,Eur,Finland,2009,31511300


None

In [223]:
# show NaN in values, "sub"-dataframe - after cleaning volume
aquaculture_prod_EU_nulls = aquaculture_prod_EU[aquaculture_prod_EU.isna().any(axis=1)]
aquaculture_prod_EU_nulls
# drop rows with no species information
aquaculture_prod_EU.dropna(subset = ["species"], inplace=True)
aquaculture_prod_EU.reset_index(drop=True)
display(aquaculture_prod_EU.head(),aquaculture_prod_EU.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 869914 entries, 0 to 1048572
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   aqua_method        869914 non-null  object
 1   aqua_environment   869914 non-null  object
 2   species            869914 non-null  object
 3   production_region  869914 non-null  object
 4   unit               869914 non-null  object
 5   country            869914 non-null  object
 6   year               869914 non-null  int64 
 7   volume             869914 non-null  object
dtypes: int64(1), object(7)
memory usage: 59.7+ MB


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2009,31511300
1,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2010,34381030
2,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2011,35084520
3,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2012,31307680
8,Cages,Brackish water,Total fishery products,All production areas,Eur,Finland,2009,31511300


None

In [224]:
# drop rows where species name was not available in english in Eurostat-classification. Named species = None in dictionary
aquaculture_prod_EU.query("species == 'None'") #show rows with None
aquaculture_prod_EU = aquaculture_prod_EU[aquaculture_prod_EU.species != "None"] # logic: keep only rows without None, assign variable again.
aquaculture_prod_EU.reset_index(drop=True, inplace=True)
display(aquaculture_prod_EU.head(),aquaculture_prod_EU.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 865810 entries, 0 to 865809
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   aqua_method        865810 non-null  object
 1   aqua_environment   865810 non-null  object
 2   species            865810 non-null  object
 3   production_region  865810 non-null  object
 4   unit               865810 non-null  object
 5   country            865810 non-null  object
 6   year               865810 non-null  int64 
 7   volume             865810 non-null  object
dtypes: int64(1), object(7)
memory usage: 52.8+ MB


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2009,31511300
1,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2010,34381030
2,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2011,35084520
3,Cages,Brackish water,Total fishery products,All production areas,Eur,European Union,2012,31307680
4,Cages,Brackish water,Total fishery products,All production areas,Eur,Finland,2009,31511300


None

In [225]:
# keep only values for all methods and all production areas
aquaculture_prod_EU = aquaculture_prod_EU[aquaculture_prod_EU.aqua_method == "All methods"]
aquaculture_prod_EU = aquaculture_prod_EU[aquaculture_prod_EU.production_region == "All production areas"]
aquaculture_prod_EU.reset_index(drop=True, inplace=True)

## Step 8: Autofill Null Values when merged cell in excel/csv file is empty   
```df[["column name 1", "column name 2"]] = df[["column name 1", "column name 2"]].fillna(method='ffill', axis=0)```
  
## Step 9: Change data types if necessary:   
* year = integer
* volume = float, round to one decimal. Convert kilograms into tonnes (1t = 1000 kg)
* value = float, round to two decimals 

In [226]:
# show datatypes
aquaculture_prod_EU.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57985 entries, 0 to 57984
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   aqua_method        57985 non-null  object
 1   aqua_environment   57985 non-null  object
 2   species            57985 non-null  object
 3   production_region  57985 non-null  object
 4   unit               57985 non-null  object
 5   country            57985 non-null  object
 6   year               57985 non-null  int64 
 7   volume             57985 non-null  object
dtypes: int64(1), object(7)
memory usage: 3.5+ MB


In [227]:
# change data types 
# year = integer - already right
#volume = float, round to one decimal. Convert kilograms into tonnes (1t = 1000 kg)
aquaculture_prod_EU["volume"] = aquaculture_prod_EU["volume"].map(lambda x : x.replace(',', '.')) # cannot convert to float with commas? got error message
aquaculture_prod_EU["volume"] = pd.to_numeric(aquaculture_prod_EU["volume"])
display(aquaculture_prod_EU.info(), aquaculture_prod_EU.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57985 entries, 0 to 57984
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   aqua_method        57985 non-null  object 
 1   aqua_environment   57985 non-null  object 
 2   species            57985 non-null  object 
 3   production_region  57985 non-null  object 
 4   unit               57985 non-null  object 
 5   country            57985 non-null  object 
 6   year               57985 non-null  int64  
 7   volume             57985 non-null  float64
dtypes: float64(1), int64(1), object(6)
memory usage: 3.5+ MB


None

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,All methods,Brackish water,Arctic char,All production areas,Eur,Iceland,2009,12688683.56
1,All methods,Brackish water,Arctic char,All production areas,Eur,Iceland,2010,13777336.58
2,All methods,Brackish water,Arctic char,All production areas,Eur,Iceland,2011,16300898.2
3,All methods,Brackish water,Arctic char,All production areas,Euro per ton,Iceland,2009,5275.96
4,All methods,Brackish water,Arctic char,All production areas,Euro per ton,Iceland,2010,5676.69


## Step 10: Species cleaning 
* aggregate species = salmon, tuna, lobster, shrimp  
* sum up species in categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon

In [228]:
# aggregate species - new dataframe with only 4 selected species = salmon, tuna, lobster and shrimps

aquaculture_EU_selected_species = aquaculture_prod_EU.query("species.str.contains('salmon|tuna|lobster|shrimp', case=False)")
aquaculture_EU_selected_species.reset_index(drop=True, inplace=True) # reset index for rows, important: inplace = True
aquaculture_EU_selected_species.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2008,55000.0
1,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2009,38430.0
2,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2010,38545.64
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2008,55000.0
4,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2009,31549730.0


In [229]:
# create list of conditions
conditions = [(aquaculture_EU_selected_species["species"].str.contains("salmon" , case=False)),
            (aquaculture_EU_selected_species["species"].str.contains("tuna", case=False)), 
            (aquaculture_EU_selected_species["species"].str.contains("lobster", case=False)), 
            aquaculture_EU_selected_species["species"].str.contains("shrimp", case=False)]
#create list of values we want to assign for each condition
values = ["Salmon", "Tuna", "Lobster", "Shrimp"]

aquaculture_EU_selected_species["species_agg"] = np.select(conditions, values)
aquaculture_EU_selected_species.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aquaculture_EU_selected_species["species_agg"] = np.select(conditions, values)


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume,species_agg
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2008,55000.0,Salmon
1,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2009,38430.0,Salmon
2,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2010,38545.64,Salmon
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2008,55000.0,Salmon
4,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2009,31549730.0,Salmon


In [230]:
# aggregate species to categories - categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon
# create list of conditions
conditions = [(aquaculture_EU_selected_species["species_agg"].str.contains("Salmon|Tuna")),
            (aquaculture_EU_selected_species["species_agg"].str.contains("Lobster|Shrimp"))]
#create list of values we want to assign for each condition
values = ["Pelagic fish", "Crustaceans"]

aquaculture_EU_selected_species["categories_species"] = np.select(conditions, values)
aquaculture_EU_selected_species.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aquaculture_EU_selected_species["categories_species"] = np.select(conditions, values)


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume,species_agg,categories_species
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2008,55000.0,Salmon,Pelagic fish
1,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2009,38430.0,Salmon,Pelagic fish
2,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2010,38545.64,Salmon,Pelagic fish
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2008,55000.0,Salmon,Pelagic fish
4,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2009,31549730.0,Salmon,Pelagic fish


## Step 11: Decoding data

## Step 12: Punctuation = decimals separator: comma, thousands separator: dot    

## Step 13: language = english  

## Step 14: Merging dataframes: tbd after Steps 1 - 13 are done

In [231]:
# Final dataframes aquaculture production EU
aquaculture_prod_EU
aquaculture_EU_selected_species

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume,species_agg,categories_species
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2008,55000.00,Salmon,Pelagic fish
1,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2009,38430.00,Salmon,Pelagic fish
2,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,Germany,2010,38545.64,Salmon,Pelagic fish
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2008,55000.00,Salmon,Pelagic fish
4,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Eur,European Union,2009,31549730.00,Salmon,Pelagic fish
...,...,...,...,...,...,...,...,...,...,...
3671,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Croatia,2012,1906.74,Tuna,Pelagic fish
3672,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Italy,2009,23.00,Tuna,Pelagic fish
3673,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Malta,2009,3441.12,Tuna,Pelagic fish
3674,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Portugal,2008,22.00,Tuna,Pelagic fish


# Aquaculture production global

## Step 1: Import file and create dataframe with individual variable

In [232]:
aquaculture_prod_global = pd.read_csv("data/X_Seafood Production Data/aquaculture_species_global_production_quantity_fao.csv")

## Step 2: Inspect dataframe with head(), info(), shape, columns, tail(), describe()

In [233]:
display(aquaculture_prod_global.head(), 
aquaculture_prod_global.info(),
aquaculture_prod_global.shape,
aquaculture_prod_global.columns,
aquaculture_prod_global.tail(),
aquaculture_prod_global.describe())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 45 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Country Name En        234 non-null    object 
 1   ISSCAAP group Name En  234 non-null    object 
 2   Unit Name              234 non-null    object 
 3   2020                   178 non-null    float64
 4   2020 Flag              45 non-null     object 
 5   2019                   185 non-null    float64
 6   2019 Flag              52 non-null     object 
 7   2018                   181 non-null    float64
 8   2018 Flag              51 non-null     object 
 9   2017                   184 non-null    float64
 10  2017 Flag              47 non-null     object 
 11  2016                   190 non-null    float64
 12  2016 Flag              39 non-null     object 
 13  2015                   191 non-null    float64
 14  2015 Flag              36 non-null     object 
 15  2014  

Unnamed: 0,Country Name En,ISSCAAP group Name En,Unit Name,2020,2020 Flag,2019,2019 Flag,2018,2018 Flag,2017,...,2004,2004 Flag,2003,2003 Flag,2002,2002 Flag,2001,2001 Flag,2000,2000 Flag
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,50.0,E,50.0,E,50.0,E,50.0,...,50.0,E,50.0,E,50.0,E,50.0,E,,
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,1612.0,,1759.0,,1850.0,,600.0,...,350.0,,350.0,,350.0,,15.0,,100.0,
2,Albania,"Shrimps, prawns",Tonnes - live weight,,,,,,,,...,,,,,,,14.0,,2.0,
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,,,103.39,,,,,...,,,,,,,,,,
4,Algeria,"Shrimps, prawns",Tonnes - live weight,1.8,,,,0.05,,5.25,...,0.0,,0.0,,0.0,N,0.0,N,0.0,N


None

(234, 45)

Index(['Country Name En', 'ISSCAAP group Name En', 'Unit Name', '2020',
       '2020 Flag', '2019', '2019 Flag', '2018', '2018 Flag', '2017',
       '2017 Flag', '2016', '2016 Flag', '2015', '2015 Flag', '2014',
       '2014 Flag', '2013', '2013 Flag', '2012', '2012 Flag', '2011',
       '2011 Flag', '2010', '2010 Flag', '2009', '2009 Flag', '2008',
       '2008 Flag', '2007', '2007 Flag', '2006', '2006 Flag', '2005',
       '2005 Flag', '2004', '2004 Flag', '2003', '2003 Flag', '2002',
       '2002 Flag', '2001', '2001 Flag', '2000', '2000 Flag'],
      dtype='object')

Unnamed: 0,Country Name En,ISSCAAP group Name En,Unit Name,2020,2020 Flag,2019,2019 Flag,2018,2018 Flag,2017,...,2004,2004 Flag,2003,2003 Flag,2002,2002 Flag,2001,2001 Flag,2000,2000 Flag
229,Viet Nam,"Lobsters, spiny-rock lobsters",Tonnes - live weight,2479.0,,2272.0,,1100.0,,1480.31,...,,,,,,,,,,
230,Viet Nam,"Shrimps, prawns",Tonnes - live weight,929989.0,,894385.0,,775000.0,,708403.73,...,275569.0,E,231717.0,E,180662.0,E,149979.0,E,89989.0,E
231,Yemen,"Shrimps, prawns",Tonnes - live weight,0.0,E,0.0,E,0.0,,0.0,...,0.0,,0.0,,0.0,,0.0,,0.0,
232,Yugoslavia SFR,"Salmons, trouts, smelts",Tonnes - live weight,,,,,,,,...,0.0,,0.0,,0.0,,0.0,,0.0,
233,Zimbabwe,"Salmons, trouts, smelts",Tonnes - live weight,43.57,,38.9,,46.1,,50.9,...,50.0,E,50.0,E,50.0,E,120.0,E,110.0,E


Unnamed: 0,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,...,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000
count,178.0,185.0,181.0,184.0,190.0,191.0,193.0,195.0,194.0,195.0,...,197.0,200.0,201.0,194.0,190.0,189.0,186.0,183.0,186.0,188.0
mean,72779.35,67539.51,65422.58,62251.01,55825.09,53805.59,51133.93,46977.43,45977.65,42204.73,...,38482.5,35780.51,34940.15,34025.1,30719.05,28706.67,27066.48,23972.07,22601.79,19869.73
std,277609.12,266684.08,261761.55,254592.59,234148.56,224932.39,216326.24,199372.61,194920.61,176886.37,...,161985.77,150214.09,149715.39,138459.87,120324.61,110045.06,102580.74,90851.61,88709.48,80414.86
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,60.18,50.0,50.0,50.67,33.73,28.0,38.54,25.0,23.27,24.65,...,25.0,20.0,13.0,22.25,11.25,11.0,7.0,5.5,8.25,2.0
50%,1326.43,1006.68,1100.0,1015.15,796.3,788.94,706.0,500.0,494.21,450.0,...,374.0,302.0,286.0,406.5,445.0,391.0,333.0,350.0,236.5,184.5
75%,15732.04,11941.0,12541.0,12784.75,11823.0,8402.92,8611.0,6697.0,6853.0,6963.0,...,5733.0,5095.5,4300.0,6647.75,6931.5,6243.0,5939.75,4825.0,4129.5,3573.0
max,2152703.0,2121363.0,2051921.0,2024032.0,1883334.0,1776263.0,1747019.0,1596157.0,1592847.0,1477236.0,...,1315859.0,1268074.0,1265636.0,1113550.0,914103.0,814259.0,821291.0,819972.0,843512.0,811366.0


## Step 3: Delete non necessary columns and rows

## Step 4: Rename columns lower case, snake case, spaces, delimiters   

## Step 5: Rename row value names. First letter upper case e.g. EUR = Eur

## Step 6: Replace (country/species) abbreviations with full names by using dictionaries 

In [234]:
# delete unnecessary columns
aquaculture_prod_global.drop(columns =['2020 Flag', '2019 Flag', '2018 Flag',
       '2017 Flag', '2016 Flag', '2015 Flag',
       '2014 Flag', '2013 Flag', '2012 Flag',
       '2011 Flag', '2010 Flag', '2009 Flag',
       '2008 Flag', '2007 Flag', '2006 Flag',
       '2005 Flag','2004 Flag','2003 Flag',
       '2002 Flag', '2001 Flag','2000 Flag'], inplace=True)
aquaculture_prod_global.head()

Unnamed: 0,Country Name En,ISSCAAP group Name En,Unit Name,2020,2019,2018,2017,2016,2015,2014,...,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,50.0,50.0,50.0,50.0,50.0,50.0,50.0,...,150.0,150.0,150.0,50.0,50.0,50.0,50.0,50.0,50.0,
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,1612.0,1759.0,1850.0,600.0,600.0,500.0,500.0,...,300.0,254.0,221.0,217.0,350.0,350.0,350.0,350.0,15.0,100.0
2,Albania,"Shrimps, prawns",Tonnes - live weight,,,,,,,,...,8.0,7.0,3.0,,,,,,14.0,2.0
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,,103.39,,,,,,...,,,,,,,,,,
4,Algeria,"Shrimps, prawns",Tonnes - live weight,1.8,,0.05,5.25,0.2,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [235]:
# rename columns
aquaculture_prod_global.rename(columns = {"Country Name En" : "country", 
                                        "ISSCAAP group Name En" : "species_group", 
                                        "Unit Name" : "unit"}, inplace=True)
aquaculture_prod_global.head()

Unnamed: 0,country,species_group,unit,2020,2019,2018,2017,2016,2015,2014,...,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,50.0,50.0,50.0,50.0,50.0,50.0,50.0,...,150.0,150.0,150.0,50.0,50.0,50.0,50.0,50.0,50.0,
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,1612.0,1759.0,1850.0,600.0,600.0,500.0,500.0,...,300.0,254.0,221.0,217.0,350.0,350.0,350.0,350.0,15.0,100.0
2,Albania,"Shrimps, prawns",Tonnes - live weight,,,,,,,,...,8.0,7.0,3.0,,,,,,14.0,2.0
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,,103.39,,,,,,...,,,,,,,,,,
4,Algeria,"Shrimps, prawns",Tonnes - live weight,1.8,,0.05,5.25,0.2,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [236]:
# turn columns years to one column with each year in one row
aquaculture_prod_global = aquaculture_prod_global.melt(id_vars=["country", "species_group", "unit"], 
                                                           var_name="year", 
                                                         value_name="volume")
aquaculture_prod_global.head(30)

Unnamed: 0,country,species_group,unit,year,volume
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,2020,50.0
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,2020,1612.0
2,Albania,"Shrimps, prawns",Tonnes - live weight,2020,
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,2020,
4,Algeria,"Shrimps, prawns",Tonnes - live weight,2020,1.8
5,Argentina,"Salmons, trouts, smelts",Tonnes - live weight,2020,953.3
6,Armenia,"Salmons, trouts, smelts",Tonnes - live weight,2020,11900.0
7,Australia,"Salmons, trouts, smelts",Tonnes - live weight,2020,67133.09
8,Australia,"Scallops, pectens",Tonnes - live weight,2020,
9,Australia,"Shrimps, prawns",Tonnes - live weight,2020,6740.2


In [237]:
# delete rows with Scallops, pectens
aquaculture_prod_global = aquaculture_prod_global[aquaculture_prod_global.species_group != "Scallops, pectens"] # logic: only keep rows without Scallops, pectens

In [238]:
aquaculture_prod_global.species_group.unique()

array(['Salmons, trouts, smelts', 'Shrimps, prawns',
       'Tunas, bonitos, billfishes', 'Lobsters, spiny-rock lobsters'],
      dtype=object)

In [239]:
aquaculture_prod_global.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4431 entries, 0 to 4913
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   country        4431 non-null   object 
 1   species_group  4431 non-null   object 
 2   unit           4431 non-null   object 
 3   year           4431 non-null   object 
 4   volume         3638 non-null   float64
dtypes: float64(1), object(4)
memory usage: 207.7+ KB


## Step 7: Inspect Null Values / NaNs and datatypes with info() 

In [240]:
# check whole dataframe
aquaculture_prod_global.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4431 entries, 0 to 4913
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   country        4431 non-null   object 
 1   species_group  4431 non-null   object 
 2   unit           4431 non-null   object 
 3   year           4431 non-null   object 
 4   volume         3638 non-null   float64
dtypes: float64(1), object(4)
memory usage: 207.7+ KB


In [241]:
# show NaN in values, "sub"-dataframe
aquaculture_prod_global_nulls = aquaculture_prod_global[aquaculture_prod_global.isna().any(axis=1)]
aquaculture_prod_global_nulls

Unnamed: 0,country,species_group,unit,year,volume
2,Albania,"Shrimps, prawns",Tonnes - live weight,2020,
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,2020,
14,Bahamas,"Shrimps, prawns",Tonnes - live weight,2020,
17,Belgium,"Salmons, trouts, smelts",Tonnes - live weight,2020,
18,Belize,"Lobsters, spiny-rock lobsters",Tonnes - live weight,2020,
...,...,...,...,...,...
4884,"Tanzania, United Rep. of","Shrimps, prawns",Tonnes - live weight,2000,
4886,Timor-Leste,"Shrimps, prawns",Tonnes - live weight,2000,
4894,United Arab Emirates,"Salmons, trouts, smelts",Tonnes - live weight,2000,
4898,United Kingdom,"Shrimps, prawns",Tonnes - live weight,2000,


In [242]:
# fill rows with NaN in values with 0 as string to not mix datatypes - later convert to float
aquaculture_prod_global["volume"].fillna(value="0", inplace=True)
display(aquaculture_prod_global.head(),aquaculture_prod_global.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4431 entries, 0 to 4913
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   country        4431 non-null   object
 1   species_group  4431 non-null   object
 2   unit           4431 non-null   object
 3   year           4431 non-null   object
 4   volume         4431 non-null   object
dtypes: object(5)
memory usage: 207.7+ KB


Unnamed: 0,country,species_group,unit,year,volume
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,2020,50.0
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,2020,1612.0
2,Albania,"Shrimps, prawns",Tonnes - live weight,2020,0.0
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,2020,0.0
4,Algeria,"Shrimps, prawns",Tonnes - live weight,2020,1.8


None

## Step 8: Autofill Null Values when merged cell in excel/csv file is empty   
```df[["column name 1", "column name 2"]] = df[["column name 1", "column name 2"]].fillna(method='ffill', axis=0)```
  
## Step 9: Change data types if necessary:   
* year = integer
* volume = float, round to one decimal. Convert kilograms into tonnes (1t = 1000 kg)
* value = float, round to two decimals 

## Step 10: Species cleaning  
* aggregate species = salmon, tuna, lobster, shrimp  
* sum up species in categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon

In [243]:
# change datatypes
aquaculture_prod_global["year"] = pd.to_numeric(aquaculture_prod_global["year"])
aquaculture_prod_global["volume"] = pd.to_numeric(aquaculture_prod_global["volume"])
aquaculture_prod_global.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4431 entries, 0 to 4913
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   country        4431 non-null   object 
 1   species_group  4431 non-null   object 
 2   unit           4431 non-null   object 
 3   year           4431 non-null   int64  
 4   volume         4431 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 207.7+ KB


In [244]:
# create list of conditions
conditions = [(aquaculture_prod_global["species_group"].str.contains("salmon" , case=False)),
            (aquaculture_prod_global["species_group"].str.contains("tuna", case=False)), 
            (aquaculture_prod_global["species_group"].str.contains("lobster", case=False)), 
            aquaculture_prod_global["species_group"].str.contains("shrimp", case=False)]
#create list of values we want to assign for each condition
values = ["Salmon", "Tuna", "Lobster", "Shrimp"]

aquaculture_prod_global["species_agg"] = np.select(conditions, values)
aquaculture_prod_global.head()

Unnamed: 0,country,species_group,unit,year,volume,species_agg
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,2020,50.0,Salmon
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,2020,1612.0,Salmon
2,Albania,"Shrimps, prawns",Tonnes - live weight,2020,0.0,Shrimp
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,2020,0.0,Salmon
4,Algeria,"Shrimps, prawns",Tonnes - live weight,2020,1.8,Shrimp


In [245]:
# aggregate species to categories - categories_species = crustaceans: shrimps and lobster; pelagic fish: tuna and salmon
# create list of conditions
conditions = [(aquaculture_prod_global["species_group"].str.contains("salmon|tuna", case=False)),
            (aquaculture_prod_global["species_group"].str.contains("lobster|shrimp", case=False))]
#create list of values we want to assign for each condition
values = ["Pelagic fish", "Crustaceans"]

aquaculture_prod_global["categories_species"] = np.select(conditions, values)
aquaculture_prod_global.head()

Unnamed: 0,country,species_group,unit,year,volume,species_agg,categories_species
0,Afghanistan,"Salmons, trouts, smelts",Tonnes - live weight,2020,50.0,Salmon,Pelagic fish
1,Albania,"Salmons, trouts, smelts",Tonnes - live weight,2020,1612.0,Salmon,Pelagic fish
2,Albania,"Shrimps, prawns",Tonnes - live weight,2020,0.0,Shrimp,Crustaceans
3,Algeria,"Salmons, trouts, smelts",Tonnes - live weight,2020,0.0,Salmon,Pelagic fish
4,Algeria,"Shrimps, prawns",Tonnes - live weight,2020,1.8,Shrimp,Crustaceans


In [246]:
# drop species_group 
aquaculture_prod_global.drop("species_group", inplace=True, axis=1)
aquaculture_prod_global.head()

Unnamed: 0,country,unit,year,volume,species_agg,categories_species
0,Afghanistan,Tonnes - live weight,2020,50.0,Salmon,Pelagic fish
1,Albania,Tonnes - live weight,2020,1612.0,Salmon,Pelagic fish
2,Albania,Tonnes - live weight,2020,0.0,Shrimp,Crustaceans
3,Algeria,Tonnes - live weight,2020,0.0,Salmon,Pelagic fish
4,Algeria,Tonnes - live weight,2020,1.8,Shrimp,Crustaceans


### Add column with region "Europe" and "Other"

In [247]:
aquaculture_prod_global.country.unique()
# If x is part of country_list - add "Europe" as value into new column "region"

Europe = ['Belgium','Bulgaria','Czechia', 'Czech Republic', 'Denmark','Germany',
'Estonia','Ireland','Greece','Spain','France','Croatia','Italy','Cyprus','Latvia','Lithuania','Luxembourg',
'Hungary','Malta','Netherlands','Austria','Poland','Portugal','Romania','Slovenia','Slovakia','Finland',
'Sweden','Iceland','Liechtenstein','Norway','Switzerland','United Kingdom','Montenegro','North Macedonia','Albania','Serbia','Turkey','Bosnia and Herzegovina',
'Kosovo','Armenia','Azerbaijan','Belarus','Moldova','Georgia','Ukraine','Russia']

d = {'Europe': Europe}

#swap key values in dict - to iterate over list
#http://stackoverflow.com/a/31674731/2901002
d1 = {k: oldk for oldk, oldv in d.items() for k in oldv}

aquaculture_prod_global['region'] = aquaculture_prod_global['country'].map(d1)
# fillna with "Rest of World"
aquaculture_prod_global["region"].fillna(value="Other", inplace=True)
aquaculture_prod_global.head(10)

Unnamed: 0,country,unit,year,volume,species_agg,categories_species,region
0,Afghanistan,Tonnes - live weight,2020,50.0,Salmon,Pelagic fish,Other
1,Albania,Tonnes - live weight,2020,1612.0,Salmon,Pelagic fish,Europe
2,Albania,Tonnes - live weight,2020,0.0,Shrimp,Crustaceans,Europe
3,Algeria,Tonnes - live weight,2020,0.0,Salmon,Pelagic fish,Other
4,Algeria,Tonnes - live weight,2020,1.8,Shrimp,Crustaceans,Other
5,Argentina,Tonnes - live weight,2020,953.3,Salmon,Pelagic fish,Other
6,Armenia,Tonnes - live weight,2020,11900.0,Salmon,Pelagic fish,Europe
7,Australia,Tonnes - live weight,2020,67133.09,Salmon,Pelagic fish,Other
9,Australia,Tonnes - live weight,2020,6740.2,Shrimp,Crustaceans,Other
10,Australia,Tonnes - live weight,2020,8345.0,Tuna,Pelagic fish,Other


In [248]:
# dataframe Europe only from FAO
# logic: keep only rows without None, assign variable again.

aquaculture_prod_global_EU_only = aquaculture_prod_global[aquaculture_prod_global.region != "Other"] 
aquaculture_prod_global_EU_only.reset_index(drop=True, inplace=True)
display (aquaculture_prod_global_EU_only.head(), 
        aquaculture_prod_global_EU_only.info()
        )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1302 entries, 0 to 1301
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   country             1302 non-null   object 
 1   unit                1302 non-null   object 
 2   year                1302 non-null   int64  
 3   volume              1302 non-null   float64
 4   species_agg         1302 non-null   object 
 5   categories_species  1302 non-null   object 
 6   region              1302 non-null   object 
dtypes: float64(1), int64(1), object(5)
memory usage: 71.3+ KB


Unnamed: 0,country,unit,year,volume,species_agg,categories_species,region
0,Albania,Tonnes - live weight,2020,1612.0,Salmon,Pelagic fish,Europe
1,Albania,Tonnes - live weight,2020,0.0,Shrimp,Crustaceans,Europe
2,Armenia,Tonnes - live weight,2020,11900.0,Salmon,Pelagic fish,Europe
3,Austria,Tonnes - live weight,2020,3268.21,Salmon,Pelagic fish,Europe
4,Azerbaijan,Tonnes - live weight,2020,107.0,Salmon,Pelagic fish,Europe


None

## Step 11: Decoding data

## Step 12: Punctuation = decimals separator: comma, thousands separator: dot    

## Step 13: language = english  

## Step 14: Merging dataframes: tbd after Steps 1 - 13 are done

In [249]:
# Final dataframes aquaculture global
aquaculture_prod_global
aquaculture_prod_global_EU_only

Unnamed: 0,country,unit,year,volume,species_agg,categories_species,region
0,Albania,Tonnes - live weight,2020,1612.00,Salmon,Pelagic fish,Europe
1,Albania,Tonnes - live weight,2020,0.00,Shrimp,Crustaceans,Europe
2,Armenia,Tonnes - live weight,2020,11900.00,Salmon,Pelagic fish,Europe
3,Austria,Tonnes - live weight,2020,3268.21,Salmon,Pelagic fish,Europe
4,Azerbaijan,Tonnes - live weight,2020,107.00,Salmon,Pelagic fish,Europe
...,...,...,...,...,...,...,...
1297,Turkey,Tonnes - live weight,2000,0.00,Tuna,Pelagic fish,Europe
1298,Ukraine,Tonnes - live weight,2000,100.00,Salmon,Pelagic fish,Europe
1299,Ukraine,Tonnes - live weight,2000,0.00,Shrimp,Crustaceans,Europe
1300,United Kingdom,Tonnes - live weight,2000,139983.00,Salmon,Pelagic fish,Europe


# Merge Dataframes

In [250]:
# here join aquaculture production from Eurostat with aquaculture production from FAO
display(aquaculture_EU_selected_species.tail(), aquaculture_prod_global_EU_only.head())
#change unit value as there is different spelling
aquaculture_prod_global_EU_only["unit"] = aquaculture_prod_global_EU_only["unit"].map(lambda x : x.replace("Tonnes - live weight", "Tonnes live weight"))
#aquaculture_EU_selected_species.drop(columns = "aqua_environment" , inplace=True) - not delete for now
display(aquaculture_EU_selected_species.tail(), aquaculture_prod_global_EU_only.head())

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume,species_agg,categories_species
3671,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Croatia,2012,1906.74,Tuna,Pelagic fish
3672,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Italy,2009,23.0,Tuna,Pelagic fish
3673,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Malta,2009,3441.12,Tuna,Pelagic fish
3674,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Portugal,2008,22.0,Tuna,Pelagic fish
3675,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Portugal,2009,39.8,Tuna,Pelagic fish


Unnamed: 0,country,unit,year,volume,species_agg,categories_species,region
0,Albania,Tonnes - live weight,2020,1612.0,Salmon,Pelagic fish,Europe
1,Albania,Tonnes - live weight,2020,0.0,Shrimp,Crustaceans,Europe
2,Armenia,Tonnes - live weight,2020,11900.0,Salmon,Pelagic fish,Europe
3,Austria,Tonnes - live weight,2020,3268.21,Salmon,Pelagic fish,Europe
4,Azerbaijan,Tonnes - live weight,2020,107.0,Salmon,Pelagic fish,Europe


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aquaculture_prod_global_EU_only["unit"] = aquaculture_prod_global_EU_only["unit"].map(lambda x : x.replace("Tonnes - live weight", "Tonnes live weight"))


Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume,species_agg,categories_species
3671,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Croatia,2012,1906.74,Tuna,Pelagic fish
3672,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Italy,2009,23.0,Tuna,Pelagic fish
3673,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Malta,2009,3441.12,Tuna,Pelagic fish
3674,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Portugal,2008,22.0,Tuna,Pelagic fish
3675,All methods,Seawater,Atlantic bluefin tuna,All production areas,Tonnes live weight,Portugal,2009,39.8,Tuna,Pelagic fish


Unnamed: 0,country,unit,year,volume,species_agg,categories_species,region
0,Albania,Tonnes live weight,2020,1612.0,Salmon,Pelagic fish,Europe
1,Albania,Tonnes live weight,2020,0.0,Shrimp,Crustaceans,Europe
2,Armenia,Tonnes live weight,2020,11900.0,Salmon,Pelagic fish,Europe
3,Austria,Tonnes live weight,2020,3268.21,Salmon,Pelagic fish,Europe
4,Azerbaijan,Tonnes live weight,2020,107.0,Salmon,Pelagic fish,Europe


In [251]:
# merge aquaculture dataframes

aquaculture_EU_selected_species_merged = pd.merge(aquaculture_EU_selected_species,aquaculture_prod_global_EU_only, on=["country", "year", "species_agg", "unit"], how="inner")
aquaculture_EU_selected_species_merged.head(30)

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_x,species_agg,categories_species_x,volume_y,categories_species_y,region
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
1,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,21983.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
4,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,22151.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
5,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
6,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,14.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
7,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,21034.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
8,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,14.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
9,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Finland,2009,11086.0,Salmon,Pelagic fish,13507.0,Pelagic fish,Europe


In [252]:
#check some values
aquaculture_EU_selected_species_merged.query("country == 'Germany'")

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_x,species_agg,categories_species_x,volume_y,categories_species_y,region
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
1,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,21983.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
4,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,22151.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
5,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
6,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,14.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
7,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,21034.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
8,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,14.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe
155,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2011,0.0,Salmon,Pelagic fish,10192.0,Pelagic fish,Europe


In [253]:
# Clean merged dataframe with only all methods + all production areas as we don't need that much details
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.aqua_method == "All methods"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.production_region == "All production areas"]

aquaculture_EU_selected_species_merged.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_x,species_agg,categories_species_x,volume_y,categories_species_y,region
0,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
1,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,21983.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
3,All methods,Brackish water,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
4,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,22151.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe


In [254]:
aquaculture_EU_selected_species_merged.query("aqua_environment.str.contains('total', case=False)")

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_x,species_agg,categories_species_x,volume_y,categories_species_y,region
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.00,Salmon,Pelagic fish,22005.00,Pelagic fish,Europe
5,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.00,Salmon,Pelagic fish,21115.00,Pelagic fish,Europe
8,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,14.00,Salmon,Pelagic fish,21048.00,Pelagic fish,Europe
11,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Finland,2009,11086.00,Salmon,Pelagic fish,13507.00,Pelagic fish,Europe
14,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Finland,2010,9846.00,Salmon,Pelagic fish,11714.00,Pelagic fish,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...
912,All methods,Sea and brackish water (total),Whiteleg shrimp,All production areas,Tonnes live weight,United Kingdom,2016,0.00,Shrimp,Crustaceans,0.00,Crustaceans,Europe
913,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Tonnes live weight,United Kingdom,2017,0.00,Shrimp,Crustaceans,0.00,Crustaceans,Europe
914,All methods,Sea and brackish water (total),Whiteleg shrimp,All production areas,Tonnes live weight,United Kingdom,2017,0.00,Shrimp,Crustaceans,0.00,Crustaceans,Europe
915,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Tonnes live weight,United Kingdom,2018,0.00,Shrimp,Crustaceans,0.00,Crustaceans,Europe


In [255]:
aquaculture_EU_selected_species_merged.aqua_environment.unique()
# Clean merged dataframe : drop rows with Seawater and Brackish water - keep only totals
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.aqua_environment != "Brackish water"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.aqua_environment != "Seawater"]
aquaculture_EU_selected_species_merged.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_x,species_agg,categories_species_x,volume_y,categories_species_y,region
1,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,21983.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Pelagic fish,Europe
4,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,22151.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
5,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Pelagic fish,Europe
7,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,21034.0,Salmon,Pelagic fish,21048.0,Pelagic fish,Europe


In [256]:
# rename and drop duplicated columns
aquaculture_EU_selected_species_merged.drop(columns=["categories_species_y"], inplace=True)
aquaculture_EU_selected_species_merged.rename(columns = {"categories_species_x" : "categories_species", "volume_x" : "volume_eurostat", "volume_y" : "volume_fao"}, inplace=True)

aquaculture_EU_selected_species_merged.head()

Unnamed: 0,aqua_method,aqua_environment,species,production_region,unit,country,year,volume_eurostat,species_agg,categories_species,volume_fao,region
1,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,21983.0,Salmon,Pelagic fish,22005.0,Europe
2,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2008,22.0,Salmon,Pelagic fish,22005.0,Europe
4,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,22151.0,Salmon,Pelagic fish,21115.0,Europe
5,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2009,14.0,Salmon,Pelagic fish,21115.0,Europe
7,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Tonnes live weight,Germany,2010,21034.0,Salmon,Pelagic fish,21048.0,Europe


In [257]:
# change order of columns
aquaculture_EU_selected_species_merged.columns.tolist()
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[['country','year','volume_eurostat','volume_fao', 
                                                                                'unit','species_agg','categories_species','aqua_method','aqua_environment','species','production_region','region']]
aquaculture_EU_selected_species_merged.sort_values(by="country", inplace=True)
aquaculture_EU_selected_species_merged.reset_index(drop=True, inplace=True)
aquaculture_EU_selected_species_merged.head()

Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe


In [258]:
aquaculture_EU_selected_species_merged.species.unique() # there are some species we don't need, like specific names

array(['Salmons, trouts, smelts', 'Salmonids nei', 'Coho(=Silver) salmon',
       'Salmonoids nei', 'Atlantic bluefin tuna',
       'Tunas, bonitos, billfishes', 'Shrimps, prawns', 'Atlantic salmon',
       'Whiteleg shrimp', 'Palaemonid shrimps nei',
       'Atlantic ditch shrimp'], dtype=object)

In [259]:
# delete rows without aggregated species

aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Salmonids nei"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Coho(=Silver) salmon"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Salmonoids nei"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Atlantic bluefin tuna"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Atlantic salmon"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Whiteleg shrimp"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Palaemonid shrimps nei"]
aquaculture_EU_selected_species_merged = aquaculture_EU_selected_species_merged[aquaculture_EU_selected_species_merged.species != "Atlantic ditch shrimp"]
aquaculture_EU_selected_species_merged.reset_index(drop=True, inplace=True)
aquaculture_EU_selected_species_merged.head(25)

Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
5,Albania,2020,1612.0,1612.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
6,Albania,2015,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
7,Albania,2014,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
8,Albania,2011,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
9,Albania,2016,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe


In [260]:
aquaculture_EU_selected_species_merged.species.unique()

array(['Salmons, trouts, smelts', 'Tunas, bonitos, billfishes',
       'Shrimps, prawns'], dtype=object)

In [261]:
aquaculture_EU_selected_species_merged.query("country == 'Germany'")

Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
210,Germany,2018,26.4,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
211,Germany,2011,0.0,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
212,Germany,2012,0.0,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
213,Germany,2013,0.0,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
214,Germany,2015,0.0,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
215,Germany,2016,21.0,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
216,Germany,2017,36.4,0.0,Tonnes live weight,Shrimp,Crustaceans,All methods,Sea and brackish water (total),"Shrimps, prawns",All production areas,Europe
217,Germany,2008,21983.0,22005.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
218,Germany,2017,0.0,10891.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
219,Germany,2016,0.0,10874.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Europe


In [298]:
# Build sum for aqua_environment to get volume_total_eurostat
# split dataframe by aqua_environment

test = aquaculture_EU_selected_species_merged
test_1 = test[test["aqua_environment"] == "Freshwater"]
test_2 = test[test["aqua_environment"] == "Sea and brackish water (total)"]
test_3 = test[test["aqua_environment"] == "Not specified"]

display(test_1.head(), test_2.head(), test_3.head())

Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",All production areas,Europe


Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
53,Croatia,2014,12.78,390.78,Tonnes live weight,Salmon,Pelagic fish,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Europe
61,Croatia,2013,3.85,348.9,Tonnes live weight,Salmon,Pelagic fish,All methods,Sea and brackish water (total),"Salmons, trouts, smelts",All production areas,Europe
62,Croatia,2018,3227.14,3227.14,Tonnes live weight,Tuna,Pelagic fish,All methods,Sea and brackish water (total),"Tunas, bonitos, billfishes",All production areas,Europe
63,Croatia,2017,2161.6,2161.61,Tonnes live weight,Tuna,Pelagic fish,All methods,Sea and brackish water (total),"Tunas, bonitos, billfishes",All production areas,Europe
64,Croatia,2016,2934.28,2934.28,Tonnes live weight,Tuna,Pelagic fish,All methods,Sea and brackish water (total),"Tunas, bonitos, billfishes",All production areas,Europe


Unnamed: 0,country,year,volume_eurostat,volume_fao,unit,species_agg,categories_species,aqua_method,aqua_environment,species,production_region,region
111,Denmark,2014,9849.0,31582.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Not specified,"Salmons, trouts, smelts",All production areas,Europe
115,Denmark,2013,10.0,34260.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Not specified,"Salmons, trouts, smelts",All production areas,Europe
121,Denmark,2017,12752.05,34257.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Not specified,"Salmons, trouts, smelts",All production areas,Europe
123,Denmark,2015,11493.0,33255.6,Tonnes live weight,Salmon,Pelagic fish,All methods,Not specified,"Salmons, trouts, smelts",All production areas,Europe
148,Finland,2019,785.9,14959.1,Tonnes live weight,Salmon,Pelagic fish,All methods,Not specified,"Salmons, trouts, smelts",All production areas,Europe


In [299]:
# merge splitted dataframes
test_final = pd.merge(test_1, test_2, how="left", on=["country", "year", "species_agg", "categories_species"])
test_final.tail(25)
test_final = pd.merge(test_final, test_3, how="left", on=["country", "year", "species_agg", "categories_species"])
test_final.head(15)

Unnamed: 0,country,year,volume_eurostat_x,volume_fao_x,unit_x,species_agg,categories_species,aqua_method_x,aqua_environment_x,species_x,...,production_region_y,region_y,volume_eurostat,volume_fao,unit,aqua_method,aqua_environment,species,production_region,region
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
5,Albania,2020,1612.0,1612.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
6,Albania,2015,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
7,Albania,2014,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
8,Albania,2011,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,
9,Albania,2016,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,,,,,,,,,,


In [300]:
#check values Germany
test_final.query("country == 'Germany'")

Unnamed: 0,country,year,volume_eurostat_x,volume_fao_x,unit_x,species_agg,categories_species,aqua_method_x,aqua_environment_x,species_x,...,production_region_y,region_y,volume_eurostat,volume_fao,unit,aqua_method,aqua_environment,species,production_region,region
125,Germany,2008,21983.0,22005.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
126,Germany,2017,0.0,10891.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
127,Germany,2016,0.0,10874.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
128,Germany,2015,0.0,10661.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
129,Germany,2014,0.0,12694.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
130,Germany,2013,0.0,12185.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
131,Germany,2012,0.0,11712.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
132,Germany,2010,21034.0,21048.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
133,Germany,2009,22151.0,21115.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,
134,Germany,2018,0.0,10205.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,All production areas,Europe,,,,,,,,


In [301]:
# add column with calculated total
# fill NaN with 0 to not get errors
test_final.fillna(value=0, inplace=True)

test_final["total_volume_eurostat"] = test_final["volume_eurostat_x"] + test_final["volume_eurostat_y"] + test_final["volume_eurostat"]
test_final.head()

Unnamed: 0,country,year,volume_eurostat_x,volume_fao_x,unit_x,species_agg,categories_species,aqua_method_x,aqua_environment_x,species_x,...,region_y,volume_eurostat,volume_fao,unit,aqua_method,aqua_environment,species,production_region,region,total_volume_eurostat
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,1759.0
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,1850.0
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,600.0


In [302]:
# check values for Germany
test_final.query("country == 'Germany'")

Unnamed: 0,country,year,volume_eurostat_x,volume_fao_x,unit_x,species_agg,categories_species,aqua_method_x,aqua_environment_x,species_x,...,region_y,volume_eurostat,volume_fao,unit,aqua_method,aqua_environment,species,production_region,region,total_volume_eurostat
125,Germany,2008,21983.0,22005.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,22005.0
126,Germany,2017,0.0,10891.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0
127,Germany,2016,0.0,10874.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0
128,Germany,2015,0.0,10661.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0
129,Germany,2014,0.0,12694.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0
130,Germany,2013,0.0,12185.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0
131,Germany,2012,0.0,11712.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,29.0
132,Germany,2010,21034.0,21048.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,21048.0
133,Germany,2009,22151.0,21115.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,22165.0
134,Germany,2018,0.0,10205.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,Europe,0.0,0.0,0,0,0,0,0,0,0.0


In [303]:
# combine dataframes to include totals in original dataframe

#aquaculture_EU_selected_species_final = pd.merge(test_final, aquaculture_EU_selected_species_merged, how="left", on=["country", "year", "species_agg", "categories_species"])
aquaculture_EU_selected_species_final = test_final
aquaculture_EU_selected_species_final.head(15)

Unnamed: 0,country,year,volume_eurostat_x,volume_fao_x,unit_x,species_agg,categories_species,aqua_method_x,aqua_environment_x,species_x,...,region_y,volume_eurostat,volume_fao,unit,aqua_method,aqua_environment,species,production_region,region,total_volume_eurostat
0,Albania,2013,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
1,Albania,2012,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
2,Albania,2019,1759.0,1759.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,1759.0
3,Albania,2018,1850.0,1850.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,1850.0
4,Albania,2017,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,600.0
5,Albania,2020,1612.0,1612.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,1612.0
6,Albania,2015,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
7,Albania,2014,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
8,Albania,2011,500.0,500.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,500.0
9,Albania,2016,600.0,600.0,Tonnes live weight,Salmon,Pelagic fish,All methods,Freshwater,"Salmons, trouts, smelts",...,0,0.0,0.0,0,0,0,0,0,0,600.0


In [295]:
aquaculture_EU_selected_species_final.columns

Index(['country', 'year', 'volume_eurostat_x', 'volume_fao_x', 'unit_x',
       'species_agg', 'categories_species', 'aqua_method_x',
       'aqua_environment_x', 'species_x', 'production_region_x', 'region_x',
       'volume_eurostat_y', 'volume_fao_y', 'unit_y', 'aqua_method_y',
       'aqua_environment_y', 'species_y', 'production_region_y', 'region_y',
       'volume_eurostat', 'volume_fao', 'unit', 'aqua_method',
       'aqua_environment', 'species', 'production_region', 'region',
       'total_volume_eurostat'],
      dtype='object')

In [304]:
# keep only necessary columns
aquaculture_EU_selected_species_final = aquaculture_EU_selected_species_final[["country", "year", "species_agg","categories_species", "total_volume_eurostat", "volume_fao_x", "unit_x" ,"aqua_method_x", "production_region_x"]]
aquaculture_EU_selected_species_final.head()

Unnamed: 0,country,year,species_agg,categories_species,total_volume_eurostat,volume_fao_x,unit_x,aqua_method_x,production_region_x
0,Albania,2013,Salmon,Pelagic fish,500.0,500.0,Tonnes live weight,All methods,All production areas
1,Albania,2012,Salmon,Pelagic fish,500.0,500.0,Tonnes live weight,All methods,All production areas
2,Albania,2019,Salmon,Pelagic fish,1759.0,1759.0,Tonnes live weight,All methods,All production areas
3,Albania,2018,Salmon,Pelagic fish,1850.0,1850.0,Tonnes live weight,All methods,All production areas
4,Albania,2017,Salmon,Pelagic fish,600.0,600.0,Tonnes live weight,All methods,All production areas


In [307]:
#drop duplicates
aquaculture_EU_selected_species_final.drop_duplicates(inplace=True)
aquaculture_EU_selected_species_final.reset_index(drop=True, inplace=True)
# check values Germany
aquaculture_EU_selected_species_final.query("country == 'Germany'")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aquaculture_EU_selected_species_final.drop_duplicates(inplace=True)


Unnamed: 0,country,year,species_agg,categories_species,total_volume_eurostat,volume_fao_x,unit_x,aqua_method_x,production_region_x
125,Germany,2008,Salmon,Pelagic fish,22005.0,22005.0,Tonnes live weight,All methods,All production areas
126,Germany,2017,Salmon,Pelagic fish,0.0,10891.0,Tonnes live weight,All methods,All production areas
127,Germany,2016,Salmon,Pelagic fish,0.0,10874.0,Tonnes live weight,All methods,All production areas
128,Germany,2015,Salmon,Pelagic fish,0.0,10661.0,Tonnes live weight,All methods,All production areas
129,Germany,2014,Salmon,Pelagic fish,0.0,12694.0,Tonnes live weight,All methods,All production areas
130,Germany,2013,Salmon,Pelagic fish,0.0,12185.0,Tonnes live weight,All methods,All production areas
131,Germany,2012,Salmon,Pelagic fish,29.0,11712.0,Tonnes live weight,All methods,All production areas
132,Germany,2010,Salmon,Pelagic fish,21048.0,21048.0,Tonnes live weight,All methods,All production areas
133,Germany,2009,Salmon,Pelagic fish,22165.0,21115.0,Tonnes live weight,All methods,All production areas
134,Germany,2018,Salmon,Pelagic fish,0.0,10205.0,Tonnes live weight,All methods,All production areas


In [309]:
aquaculture_EU_selected_species_final.rename(columns={"volume_fao_x" : "volume_fao", "unit_x" : "unit", "aqua_method_x" : "aqua_method", "production_region_x" : "production_region"}, inplace= True)
aquaculture_EU_selected_species_final.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aquaculture_EU_selected_species_final.rename(columns={"volume_fao_x" : "volume_fao", "unit_x" : "unit", "aqua_method_x" : "aqua_method", "production_region_x" : "production_region"}, inplace= True)


Unnamed: 0,country,year,species_agg,categories_species,total_volume_eurostat,volume_fao,unit,aqua_method,production_region
0,Albania,2013,Salmon,Pelagic fish,500.0,500.0,Tonnes live weight,All methods,All production areas
1,Albania,2012,Salmon,Pelagic fish,500.0,500.0,Tonnes live weight,All methods,All production areas
2,Albania,2019,Salmon,Pelagic fish,1759.0,1759.0,Tonnes live weight,All methods,All production areas
3,Albania,2018,Salmon,Pelagic fish,1850.0,1850.0,Tonnes live weight,All methods,All production areas
4,Albania,2017,Salmon,Pelagic fish,600.0,600.0,Tonnes live weight,All methods,All production areas


# Store dataframes in DBeaver

### Fish catch dataframes

In [311]:
##fish_catch_EU_all_species

# Import get_engine from sql_functions.py. You will need to restart your kernel and rerun at this point since we changed the module since we first imported it.
from sql_functions import get_engine
# create a variable called engine using the get_engine function
engine = get_engine()

# Set the schema to your capstone schema and the table_name variable to 'fish_catch' etc + your initials/group number

schema = 'capstone_fish_are_friends' # capstone schema example capstone_fish_are_friends
table_name = 'catch_eu_all' # Example: 'carriers_pw' for Philipp Wendt


# Write records stored in a dataframe to SQL database using to_sql() function
if engine!=None:
   try:
      fish_catch_EU.to_sql(name=table_name, # Name of SQL table variable
                        con=get_engine(), # Engine or connection
                        schema=schema, # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
      print(f"The {table_name} table was imported successfully.")
    # Error handling
   except (Exception, psycopg2.DatabaseError) as error:
      print(error)
      engine = None
else:
   print('No engine')

The catch_eu_all table was imported successfully.


In [312]:
##fish_catch_selected_species

# Import get_engine from sql_functions.py. You will need to restart your kernel and rerun at this point since we changed the module since we first imported it.
from sql_functions import get_engine
# create a variable called engine using the get_engine function
engine = get_engine()

# Set the schema to your capstone schema and the table_name variable to 'fish_catch' etc + your initials/group number

schema = 'capstone_fish_are_friends' # capstone schema example capstone_fish_are_friends
table_name = 'catch_eu_sel' # Example: 'carriers_pw' for Philipp Wendt


# Write records stored in a dataframe to SQL database using to_sql() function
if engine!=None:
   try:
      fish_catch_selected_species.to_sql(name=table_name, # Name of SQL table variable
                        con=get_engine(), # Engine or connection
                        schema=schema, # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
      print(f"The {table_name} table was imported successfully.")
    # Error handling
   except (Exception, psycopg2.DatabaseError) as error:
      print(error)
      engine = None
else:
   print('No engine')

The catch_eu_sel table was imported successfully.


### Aquaculture dataframes

In [314]:
## aquaculture_prod_EU

# Import get_engine from sql_functions.py. You will need to restart your kernel and rerun at this point since we changed the module since we first imported it.
from sql_functions import get_engine
# create a variable called engine using the get_engine function
engine = get_engine()

# Set the schema to your capstone schema and the table_name variable to 'fish_catch' etc + your initials/group number

schema = 'capstone_fish_are_friends' # capstone schema example capstone_fish_are_friends
table_name = 'aquaclt_prod_eu_all' # Example: 'carriers_pw' for Philipp Wendt


# Write records stored in a dataframe to SQL database using to_sql() function
if engine!=None:
   try:
      aquaculture_prod_EU.to_sql(name=table_name, # Name of SQL table variable
                        con=get_engine(), # Engine or connection
                        schema=schema, # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
      print(f"The {table_name} table was imported successfully.")
    # Error handling
   except (Exception, psycopg2.DatabaseError) as error:
      print(error)
      engine = None
else:
   print('No engine')

The aquaclt_prod_eu_all table was imported successfully.


In [None]:
## aquaculture_prod_global

# Import get_engine from sql_functions.py. You will need to restart your kernel and rerun at this point since we changed the module since we first imported it.
from sql_functions import get_engine
# create a variable called engine using the get_engine function
engine = get_engine()

# Set the schema to your capstone schema and the table_name variable to 'fish_catch' etc + your initials/group number

schema = 'capstone_fish_are_friends' # capstone schema example capstone_fish_are_friends
table_name = 'aquaclt_prod_global_sel' # Example: 'carriers_pw' for Philipp Wendt


# Write records stored in a dataframe to SQL database using to_sql() function
if engine!=None:
   try:
      aquaculture_prod_global.to_sql(name=table_name, # Name of SQL table variable
                        con=get_engine(), # Engine or connection
                        schema=schema, # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
      print(f"The {table_name} table was imported successfully.")
    # Error handling
   except (Exception, psycopg2.DatabaseError) as error:
      print(error)
      engine = None
else:
   print('No engine')

The aquaclt_prod_global_sel table was imported successfully.


In [313]:
## aquaculture_EU_selected_species_final

# Import get_engine from sql_functions.py. You will need to restart your kernel and rerun at this point since we changed the module since we first imported it.
from sql_functions import get_engine
# create a variable called engine using the get_engine function
engine = get_engine()

# Set the schema to your capstone schema and the table_name variable to 'fish_catch' etc + your initials/group number

schema = 'capstone_fish_are_friends' # capstone schema example capstone_fish_are_friends
table_name = 'aquaclt_prod_eu_sel' # Example: 'carriers_pw' for Philipp Wendt


# Write records stored in a dataframe to SQL database using to_sql() function
if engine!=None:
   try:
      aquaculture_EU_selected_species_final.to_sql(name=table_name, # Name of SQL table variable
                        con=get_engine(), # Engine or connection
                        schema=schema, # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
      print(f"The {table_name} table was imported successfully.")
    # Error handling
   except (Exception, psycopg2.DatabaseError) as error:
      print(error)
      engine = None
else:
   print('No engine')

The aquaclt_prod_eu_sel table was imported successfully.
