## Load Packages

In [1]:
# Link to Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Connect to Earth Engine
# Activate when doing step 

#import ee
#ee.Authenticate()
#ee.Initialize()

In [3]:
!pip install geopandas
!pip install geojson

import os
from glob import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import geojson

#!python --version
#np.__version__

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 24.4 MB/s 
Collecting fiona>=1.8
  Downloading Fiona-1.8.21-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 41.6 MB/s 
[?25hCollecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 55.9 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.21 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1
Lo

## 1. Tidy up Datasets before Mapping/Plotting



### 1.0: The initial input data are geodataframes of pond zonal statistics in parcels

### 1.1: Split zonStats dataframes by country

In [None]:
# Customize to the directory containing input files
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.0")

files = glob(os.path.join(os.getcwd(), '*'))
files = sorted(files)
len(files)

56

In [None]:
# Customize
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.1"

In [None]:
def genFun(n): # n = len(files)

  i = 0
  while i < n:
    # get parcel id
    parcelID = files[i].split("/")[-1].split('.')[0].split('_')[-1]

    gdf = gpd.read_file(files[i])

    # Drop rows where its value in column "GID_0" is None
    gdf = gdf.dropna(axis=0, subset=['GID_0'])
    #print("parcel-", parcelID, " droped ", len(gdf[gdf['GID_0'].isna()].index), " nan records.")

    # get gids
    gids = list(set(gdf['GID_0']))

    # Split gdf by country
    gdfs = [gdf[gdf['GID_0']==k] for k in gids]

    j = 0
    while j < len(gids):
      yield gdfs[j].to_file(os.path.join(outpath, str(gids[j])+'_'+parcelID+".geojson"), driver = 'GeoJSON')
      j += 1

    i += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

### 1.2: Tidy up using GEE

* Calculate area of individual ponds in $m^2$
* Convert geometry type from polygon to point (to reduce file size)

In [None]:
# Customize to output path of step 1.1
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.1")

files = sorted(glob(os.path.join(os.getcwd(), '*')))
len(files)

69

In [None]:
def tidy_up(geojson_path):

  # Load GeoJSON
  with open(geojson_path) as f:
    geojson_ini = geojson.load(f)
  
  # Subset big GeoJSON into processible parts (2500 items/part)
  geojson_tiled = [tuple(geojson_ini[i:i+2500]) for i in range(0, len(geojson_ini['features']), 2500)]

  # Load GeoJSON as feature collection
  fc = [ee.FeatureCollection(i) for i in geojson_tiled]

  # Pond Area in square meters
  fc_area = [i.map(lambda feature: feature.set({'pondSize_m2': feature.area()})) for i in fc]

  # Get Centroid of Pond Polygons, thus transfer geometry type from polygon to point
  fc_point= [i.map(lambda feature: feature.centroid()) for i in fc_area]

  return fc_point

In [None]:
def genFun(n): # len(files)

  i = 0
  while i < n:
    input = files[i]
    output = tidy_up(input) # output is a list

    # ~ country name should be the prefix
    outfile_namePrefix = files[i].split('/')[-1].split('.')[0]

    for j in range(len(output)):
      task_config = {
              'description': 'zonStats_tidy',
              'fileNamePrefix': outfile_namePrefix+'_'+str(j),

              'folder': '1.2',      
              ### Customize             
              ### !NOTE: GEE writes data to the specified folder under Drive Home Menu.
              
              'fileFormat': 'GeoJSON'}
      task = ee.batch.Export.table.toDrive(output[j], **task_config)
      yield task.start()
      
    i += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

### 1.3: Merge output data frames from GEE by Country

In [None]:
# Specify country names; Should be same as output from step 1.2
#countries = ['Malaysia', 'Thailand', 'Vietnam', 'Myanmar', 'Cambodia']
countries = ['KHM', 'MMR', 'MYS', 'THA', 'VNM']

def genFun(n):
  inPath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.2/" 
  # Directory of output from step 1.2

  outPath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3"
  # Specify

  i = 0
  while i < n:
    files = glob(os.path.join(inPath, '*'+countries[i]+'*.geojson'))
    dfs = [gpd.read_file(i) for i in files]
    df = pd.concat([i for i in dfs])
    yield df.to_file(os.path.join(outPath, countries[i]+"_merged.geojson"))
    i += 1

gen = genFun(len(countries))

In [None]:
# Exhaust Generator
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

### 1.4: De-geometry

In [None]:
# Output directory from step 1.3
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/")

files = glob(os.path.join(os.getcwd(), '*merged*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/KHM_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/MMR_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/MYS_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/THA_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/VNM_merged.geojson']

In [None]:
# Customize
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/"

In [None]:
def genFun(n): # n=len(files)
  j = 0
  while j < n:
    gdf = gpd.read_file(files[j])
    df = gdf.drop('geometry', 1)
    country = files[j].split('/')[-1].split('_')[0]
    yield df.to_csv(os.path.join(outpath, country+'_degeom.csv'), index=False)
    j += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

  """
  """
  """
  """
  """


### 1B: Merge data frames (.csv)

In [None]:
# Specify Path
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/")
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/regionalAccuracy/lev2/Cambodia_regionAccuracy_adm2_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/regionalAccuracy/lev2/Vietnam_regionAccuracy_adm2_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/regionalAccuracy/lev2/Thailand_regionAccuracy_adm2_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/regionalAccuracy/lev2/Myanmar_regionAccuracy_adm2_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/9.discussion_Stats/regionalAccuracy/lev2/Malaysia_regionAccuracy_adm2_v1.geojson']

In [None]:
dfs = [pd.read_csv(i) for i in files]

In [None]:
df_concat = pd.concat([i for i in dfs])

In [None]:
df_concat.to_csv(os.path.join(os.getcwd(), 'all_deGeom.csv'), index=False)

### 1C: Merge geodata frames (.geojson)

In [None]:
# Specify path
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/")

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/dfD_GrR_byRegion/admin_lev2/10y_Gr/Cambodia_10yGr_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/dfD_GrR_byRegion/admin_lev2/10y_Gr/Vietnam_10yGr_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/dfD_GrR_byRegion/admin_lev2/10y_Gr/Thailand_10yGr_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/dfD_GrR_byRegion/admin_lev2/10y_Gr/Myanmar_10yGr_v1.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/gdf_zonstats/7.map_development/dfD_GrR_byRegion/admin_lev2/10y_Gr/Malaysia_10yGr_v1.geojson']

In [None]:
dfs = [gpd.read_file(i) for i in files]

In [None]:
df_concat = pd.concat([i for i in dfs])

In [None]:
df_concat.to_file(os.path.join(os.getcwd(), 'all_regionalAccuracy_lev2_v1.geojson'), driver='GeoJSON')

## 2. Tidy up for plotting

### 2.1: Statistics of NA in ponds

In [None]:
# Specify path to output directory of step 1.4
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/")
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/KHM_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/MMR_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/MYS_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/THA_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/VNM_degeom.csv']

In [None]:
# Customize
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.1/"

In [None]:
def genFun(n): # n=len(files)
  j = 0
  #lst = []
  while j < n:
    df = pd.read_csv(files[j])
    col_stats = [i for i in df.columns if 'majority' in i]
    years = [i.split('_')[0] for i in col_stats]
    na_rate = [len(df[df[i]==2].index)/len(df.index)*100 for i in col_stats]
    country = list(set(list(df['NAME_0'])))*len(years)
    data = {'country': country, 
            'year': years,
            'na_rate': na_rate}
    df_NA = pd.DataFrame(data)
    yield df_NA.to_csv(os.path.join(outpath, country[0]+'_naStats.csv'), index=False)
    j += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

In [None]:
# Merge dfs
os.chdir(outpath)
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig2/Cambodia_naStats.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig2/Myanmar_naStats.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig2/Malaysia_naStats.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig2/Thailand_naStats.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig2/Vietnam_naStats.csv']

In [None]:
dfs = [pd.read_csv(i) for i in files]
df = pd.concat([i for i in dfs])
#df = df.drop('Unnamed: 0', 1)

In [None]:
df.to_csv(os.path.join(os.getcwd(), "all_naStats.csv"), index=False)

### 2.2: Total area of active ponds by country

In [None]:
# Specify path to output directory of step 1.4
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/")
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/KHM_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/MMR_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/MYS_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/THA_degeom.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.4/VNM_degeom.csv']

In [None]:
# Customize
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.2/"

In [None]:
def genFun(n): # n=len(files)
  j = 0
  while j < n:
    df = pd.read_csv(files[j])
    # Fill NA of first column
    #df[[df.columns[0]]] = df[[df.columns[0]]].fillna(value=2)
    # Convert first column to integer
    #df = df.astype({df.columns[0]:int})

    tsCol = [i for i in df.columns if 'majority' in i]
    active_ponds = [len(df[df[i]==1]) for i in tsCol]
    active_area = [round(sum(df[df[i]==1]['pondSize_m2'])/1e6, 2) for i in tsCol]
    years = [i.split('_')[0] for i in tsCol]
    country = list(set(list(df['NAME_0'])))*len(years)

    data = {'country': country, 
            'year': years,
            'pond_count': active_ponds,
            'pond_area_km2': active_area}
    df_plot = pd.DataFrame(data)

    yield df_plot.to_csv(os.path.join(outpath, country[0]+'_#A.csv'), index=False)
    j += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

In [None]:
# Merge dfs
os.chdir(outpath)

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig3-4/Cambodia_#A.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig3-4/Myanmar_#A.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig3-4/Malaysia_#A.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig3-4/Thailand_#A.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig3-4/Vietnam_#A.csv']

In [None]:
dfs = [pd.read_csv(i) for i in files]

In [None]:
# Add new column: OO percent area of area in 2019
dfs = [i.assign(area_percent_of2019 = lambda x: round(x['pond_area_km2'] / float(x['pond_area_km2'][x['year']==2019]) * 100, 2)) for i in dfs]
# Add new column: OO percent count of count in 2019
dfs = [i.assign(count_percent_of2019 = lambda x: round(x['pond_count'] / float(x['pond_count'][x['year']==2019]) * 100, 2)) for i in dfs]

In [None]:
# Merge df 
df = pd.concat([i for i in dfs])

In [None]:
df.to_csv(os.path.join(os.getcwd(), 'all_#A.csv'), index=False)

### 2.3: Proportion of ponds categorized by distance from coastline

In [None]:
# Specify path to output directory of step 1.3 
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/")

files = glob(os.path.join(os.getcwd(), '*'))
files
#countries = [i.split("/")[-1].split(".")[0].split("_")[0] for i in files]

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/KHM_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/MMR_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/MYS_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/THA_merged.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/1.3/VNM_merged.geojson']

In [None]:
# Customize
output_path = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.3/"

In [None]:
def genFun(n): # n=len(files)

  j = 0
  while j < n:
    gdf = gpd.read_file(files[j])
    #country = files[j].split("/")[-1].split(".")[0].split("_")[0]

    # Get needed column names
    cols_ini = [i for i in gdf.columns if 'majority' in i]
    cols_yr = [i.split('_')[0] for i in cols_ini]
    # Define Distance Category
    class_distance = {"[0,5)":range(0,5000), "[5,20)":range(5000,20000), "[20,50)":range(20000,50000), "[50,100)":range(50000,100000), "[100,200)":range(100000,200000)}

    # Subset gdf by distance in predefined category
    gdf_grouped = [gdf[gdf['distance'].isin(i)] for i in class_distance.values()]

    # Column "year"
    year = cols_yr * len(gdf_grouped)
    # Column "country"
    country = list(set(gdf['NAME_0']))*len(year)

    # Column "distance class"
    distance_class = [[i]*len(cols_yr) for i in list(class_distance.keys())]
    # Column "number of ponds"
    N_ponds = [[len(i[i[j]==1].index) for j in cols_ini] for i in gdf_grouped]
    # Column "Area sum"
    Area_ha = [[round(i[i[j] == 1]['pondSize_m2'].sum()/10000, 2) for j in cols_ini] for i in gdf_grouped]

    # Unnest nested lists
    from itertools import chain
    distance_class = list(chain(*distance_class))
    N_ponds = list(chain(*N_ponds))
    Area_ha = list(chain(*Area_ha))
    
    df = pd.DataFrame({'country': country, 'year':year, 'distance(km)':distance_class, 'pond_number':N_ponds, 'pond_area(ha)':Area_ha})

    yield df.to_csv(os.path.join(output_path, country[0]+'_distance.csv'), index=False)
    j += 1

gen = genFun(len(files))

In [None]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

In [None]:
# Specify Path
os.chdir(output_path)
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig7/Cambodia_distance.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig7/Myanmar_distance.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig7/Malaysia_distance.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig7/Thailand_distance.csv',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.fig7/Vietnam_distance.csv']

In [None]:
dfs = [pd.read_csv(i) for i in files]

In [None]:
df_concat = pd.concat([i for i in dfs])

In [None]:
df_concat.to_csv(os.path.join(os.getcwd(), 'all_distance.csv'), index=False)

### 2.4: FAO Statistics

In [24]:
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.4/")
files = glob(os.path.join(os.getcwd(), '*Input*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/2.4/Input_aquaculture_production_quantity_1950_2020_v1.csv']

In [25]:
df = pd.read_csv(files[0])

years = [str([i]) for i in range(1988,2020)]
# Select necessary columns
cols = [i for i in df.columns if i in years]
cols = [df.columns[0]] + cols

# Remove unneeded rows
df = df[cols][:-2]

In [26]:
countries = list(set(list(df['Country (Name)'])))
# Subset by country
dfs = [df[df['Country (Name)'] == i] for i in countries]
# Sum all productions by year
dfs_aggr = [i.sum() for i in dfs]

In [27]:
df_ls = []
for i in range(len(dfs_aggr)):

  tlw = list(dfs_aggr[i])[1:]
  years = list(range(1988, 2020))
  country = countries[i]

  df_tidy = pd.DataFrame({'country':country, 'year':years, 'tonnes_live_weight': tlw})

  # Calculate percentage difference between production of year_y and production of 2019
  tlw2019 = float(df_tidy[df_tidy['year']==2019]['tonnes_live_weight'])
  df_tidy['percDif_to_2019'] = round(df_tidy['tonnes_live_weight'] / tlw2019 * 100, 1)

  df_ls = df_ls + [df_tidy]


In [28]:
df_merged = pd.concat([i for i in df_ls])

# Rewrite country name for Vietnam
df_merged['country'] = df_merged['country'].replace({'Viet Nam':'Vietnam'})

In [29]:
df_merged.to_csv(os.path.join(os.getcwd(), 'Output_fao_FrBrWater.csv'), index=False)

## 3. Tidy up for mapping

* lev1: province-level
* lev2: district-level

### 3.0: Calculate area of administrative region using GEE

#### Calculation in GEE

In [None]:
# Specifiy path to directory containing admin region vector files
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/")

files = glob(os.path.join(os.getcwd(), '*')) # Admin level-1 \ -2
countries = [i.split('/')[-1].split('_')[0] for i in files]

files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/Vietnam_lev2.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/Thailand_lev2.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/Myanmar_lev2.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/Malaysia_lev2.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.1_input/Cambodia_lev2.geojson']

In [None]:
def add_area(geojson_path):

  # Load GeoJSON
  with open(geojson_path) as f:
    geojson_ini = geojson.load(f)

  # Subset big GeoJSON into processible parts (20 items/part)
  geojson_tiled = [tuple(geojson_ini[i:i+10]) for i in range(0, len(geojson_ini['features']), 10)]
  
  # Load GeoJSON as feature collection
  fc = [ee.FeatureCollection(i) for i in geojson_tiled]

  # Pond Area in square meters
  fc_area = [i.map(lambda feature: feature.set({'Area_m2': feature.area()})) for i in fc]

  return fc_area

In [None]:
def genFun(n): # len(countries)

  i = 0
  while i < n:
    input = files[i]
    output = add_area(input) # output is a list

    for j in range(len(output)):
      task_config = {
              'description': 'add_area',
              'fileNamePrefix': countries[i]+'_lev2_'+str(j),

              'folder': '3.0.2_gee',
              ### Customize             
              ### !NOTE: GEE writes data to the specified folder under Drive Home Menu.
              
              'fileFormat': 'GeoJSON'}
      task = ee.batch.Export.table.toDrive(output[j], **task_config)
      yield task.start()
      
    i += 1

gen = genFun(len(countries))

In [None]:
# Exhaust Generator
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

#### Merge output from GEE by Country

* *Note: For convenience of later steps, make prefix of country name same as the output from step 1.4*
* In the template data, I changed the prefixes in 3.0.3 manually because I forgot to do it beforehand.



In [6]:
# Specify country names; Should be same as output from step 1.2
countries = ['Malaysia', 'Thailand', 'Vietnam', 'Myanmar', 'Cambodia']
#countries = ['KHM', 'MMR', 'MYS', 'THA', 'VNM']

def genFun(n):
  inPath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.2_gee/" 

  outPath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.0/3.0.3/"
  # Specify

  i = 0
  while i < n:
    files = glob(os.path.join(inPath, '*'+countries[i]+'*.geojson'))
    dfs = [gpd.read_file(i) for i in files]
    df = pd.concat([i for i in dfs])
    yield df.to_file(os.path.join(outPath, countries[i]+"_merged.geojson"))
    i += 1

gen = genFun(len(countries))

In [7]:
# Exhaust Generator
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

### 3.1: Total Area of Ponds in Provinces / Districts

In [18]:
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/")

filesA = glob(os.path.join(os.getcwd(), '1.4', '*')) 
# Output directory of step 1.4
filesB = glob(os.path.join(os.getcwd(), '3.0', '3.0.3', '*')) 
# Output directory of step 3.0

countries = ['KHM', 'MMR', 'MYS', 'THA', 'VNM']
files = [[i for i in filesA+filesB if j in i] for j in countries]

gidLevl = 'GID_2'
# Specify the entity for administrative level (the column name on which both dfs should join) 
# 'GID_1' | 'GID_2'

In [19]:
outpath = os.path.join(os.getcwd(), '3.1')
# Define output path
gid = 'gid2'
# For naming of output files

In [20]:
def genFun(n): # n=len(files)
  j = 0
  while j < n:
    dfA = pd.read_csv(files[j][0]) # dfA is df of pond zonStats
    dfB = gpd.read_file(files[j][1]) # dfB is gdf of admin region

    dfB_filtered = dfB[dfB[gidLevl].isin(dfA[gidLevl])] # Keep only admin regions containing ponds
    GIDs = dfB_filtered[gidLevl].tolist()

    colnames = [col for col in dfA.columns if 'majority' in col]
    
    # Remove years with data gap > 5%
    for i in range(len(colnames)):
      if len(dfA[dfA[colnames[i]]==2]) / len(dfA[colnames[i]]) > 0.05:
        dfA = dfA.drop(colnames[i], axis=1)

    colnames_updated = [col for col in dfA.columns if 'majority' in col]
    years = [i.split('_')[0] for i in colnames_updated]

    dfB_joined = dfB_filtered
    for y in years:
      dfA_1Yactive = dfA[dfA[y+'_majority']==1] # For year Y, select ponds that are active 

      df_area_1Y = dfA_1Yactive.groupby([gidLevl], as_index=False)['pondSize_m2'].sum() # Group active ponds by GID and calculate sum of pond area for each GID
      df_area_1Y['pondSize_m2'] = round(df_area_1Y['pondSize_m2']/1e4, 2) # Transfer pond area from m2 to ha
      df_area_1Y = df_area_1Y.rename(columns={'pondSize_m2':'pondArea_ha_'+y}) # Rename column

      dfB_joined = pd.merge(dfB_joined, df_area_1Y, on=gidLevl, how='left') # Join "pond area by GID" of year Y into admin df

      colArea = [col for col in dfB_joined.columns if 'pondArea_ha' in col]   
      dfB_joined[colArea] = dfB_joined[colArea].fillna(0)
      # NA filled with 0

    yield dfB_joined.to_file(os.path.join(outpath, countries[j]+'_'+gid+'_#A.geojson'), driver='GeoJSON')
    j += 1

gen = genFun(len(files))

In [21]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

### 3.2: Density of Pond Area by Province / District

In [13]:
# Specify input path to the directory of output of step 3.1
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/")

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/KHM_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MMR_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MYS_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/THA_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/VNM_gid2_#A.geojson']

In [14]:
countries = [i.split('/')[-1].split('_')[0] for i in files]

In [15]:
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/"
# Specify output path
gid = 'gid2'
# For naming the output files

In [16]:
def genFun(n): # n=len(files)
  j = 0
  while j < n:
    
    gdf = gpd.read_file(files[j])

    colArea = [col for col in gdf.columns if 'pondArea_ha' in col]
    # Get column names of "extent of ponds" time series

    # Calculate new column: pond area in m2/ha
    for i in colArea:
      new_colname = "density_(m2/ha)_" + i.split("_")[-1]
      gdf[new_colname] = (gdf[i] * 1e4) / (gdf['Area_m2'] / 1e4)

    yield gdf.to_file(os.path.join(outpath, countries[j]+'_'+gid+'_density.geojson'), driver='GeoJSON')
    j += 1

gen = genFun(len(files))

In [17]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

In [18]:
os.chdir(outpath)
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/KHM_gid2_density.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/MMR_gid2_density.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/MYS_gid2_density.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/THA_gid2_density.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.2/VNM_gid2_density.geojson']

In [19]:
dfs = [gpd.read_file(i) for i in files]

In [20]:
df_concat = pd.concat([i for i in dfs])

In [21]:
df_concat.to_file(os.path.join(os.getcwd(), 'all_'+gid+'_density.geojson'), driver='GeoJSON')

### 3.3: Annual Growth Rate of Pond Area by Provinces / Districts

#### calculate annual growth rate

In [30]:
# Specify input path to the directory of output of step 3.1
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1")

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/KHM_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MMR_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MYS_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/THA_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/VNM_gid2_#A.geojson']

In [31]:
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/"
# Specify output path
gid = 'gid2'
# For naming the output files

In [32]:
countries = [i.split('/')[-1].split('_')[0] for i in files]

In [33]:
def genFun(n): # n=len(files)
  j = 0
  while j < n:
    df = gpd.read_file(files[j])

    # Annual Increase Rate
    colnames = [col for col in df.columns if 'pondArea_ha' in col]
    years = [i.split('_')[-1] for i in colnames]

    df_increase = df
    for i in range(len(colnames)-1):
      increase_rate = round((df[colnames[i+1]] - df[colnames[i]]) / df[colnames[i]]*100, 1)
      df_increase['1yGr_'+years[i+1]] = increase_rate
    # Replace infinity with nan
    df_increase.replace([np.inf, -np.inf], np.nan, inplace=True)

    yield df_increase.to_file(os.path.join(outpath, countries[j]+'_'+gid+'_1yGr.geojson'), driver='GeoJSON')
    j += 1

gen = genFun(len(files))

In [34]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

#### Merge Geodataframes

In [35]:
os.chdir(outpath)

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/KHM_gid2_1yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/MMR_gid2_1yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/MYS_gid2_1yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/THA_gid2_1yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/VNM_gid2_1yGr.geojson']

In [36]:
dfs = [gpd.read_file(i) for i in files]

In [37]:
df_concat = pd.concat([i for i in dfs])

In [38]:
df_concat.to_file(os.path.join(os.getcwd(), 'all_'+gid+'_1yGr.geojson'), driver='GeoJSON')

#### Convert to csv

In [39]:
files = glob(os.path.join(os.getcwd(), '*all*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.3/all_gid2_1yGr.geojson']

In [40]:
gdf = gpd.read_file(files[0])

In [41]:
colnames = [i for i in gdf.columns if '1y' in i]

In [42]:
ls = []
for i in range(len(colnames)):
  df_new = gdf[['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'GID_2', 'NAME_2']]
  df_new['stats_item'] = [colnames[i]]*len(gdf.index)
  df_new['year'] = [i.split('_')[-1] for i in df_new['stats_item']]
  df_new['stats_value'] = gdf[colnames[i]]
  ls.append(df_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: 

In [43]:
df_reshaped = pd.concat([i for i in ls])

In [46]:
df_reshaped.to_csv(os.path.join(os.getcwd(), 'all_'+gid+'_1yGr.csv'), index=False)

### 3.4: Five-year avg. Rate of Increase in Pond Area by Provinces / Districts

$ \frac{A_{T.tail} - A_{T.head}}{A_{T.head}} \times \frac{1}{T} \times 100 $

#### Calculate 5-year avg. annual growth rate

In [5]:
# Specify input path to the directory of output of step 3.1
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/")

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/KHM_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MMR_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MYS_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/THA_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/VNM_gid2_#A.geojson']

In [6]:
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/"
# Specify output path
gid = 'gid2'
# For naming the output files

In [7]:
countries = [i.split('/')[-1].split('_')[0] for i in files]

In [10]:
def genFun(n): # n=len(files)

  j = 0
  while j < n:
    df = gpd.read_file(files[j])

    x = range(1985, 2020)
    y = [x[n:n+6] for n in range(0, len(x), 5)]
    category_yrs = [['pondArea_ha_'+str(i) for i in j] for j in y]

    colnames = [col for col in df.columns if 'pondArea_ha' in col]
    new_col = ['5y_avgGr_'+i[-1].split("_")[-1] for i in category_yrs]

    period = [[i for i in j if i in colnames] for j in category_yrs]

    #---------------------------------------------------
    def my_function(row):
      """For one row (admin region in the case), For every time period, 
      calculate the growth rate between the last value and the first non-zero value.
      
      This function is to be used with apply() function for on a DF"""

      new_attr = []

      for i in range(len(period)):
        cols = list(row[period[i]]) 
        # Select columns of a time period
        nzero_index = next((i for i, x in enumerate(cols) if x != 0), None) 
        # Find the index of the first non-zero element
      
        if nzero_index != None and len(cols[nzero_index:]) > 1:
          avgIn = (cols[-1] - cols[nzero_index]) / cols[nzero_index] / (len(cols[nzero_index:])-1) *100
          avgIn = round(avgIn, 1)
        else: 
          avgIn = None

        new_attr = new_attr + [avgIn]
      return new_attr
    #---------------------------------------------------

    new_attrs_byrow = df.apply(my_function, axis=1)

    new_attrs_bycol = [[i[j] for i in new_attrs_byrow] for j in range(len(new_attrs_byrow[0]))]

    df[new_col] = pd.Series(new_attrs_bycol)

    yield df.to_file(os.path.join(outpath, countries[j]+'_'+gid+'_5yGr.geojson'), driver='GeoJSON')
    j += 1

gen = genFun(len(files))

In [11]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

#### Merge Geodataframes

In [12]:
os.chdir(outpath)

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/KHM_gid2_5yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/MMR_gid2_5yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/MYS_gid2_5yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/THA_gid2_5yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/VNM_gid2_5yGr.geojson']

In [13]:
dfs = [gpd.read_file(i) for i in files]

In [14]:
df_concat = pd.concat([i for i in dfs])

In [15]:
df_concat.to_file(os.path.join(os.getcwd(), 'all_'+gid+'_5yGr.geojson'), driver='GeoJSON')

#### Convert to csv

In [16]:
files = glob(os.path.join(os.getcwd(), '*all*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.4/all_gid2_5yGr.geojson']

In [17]:
gdf = gpd.read_file(files[0])

In [18]:
colnames = [i for i in gdf.columns if '5y' in i]

In [19]:
ls = []
for i in range(len(colnames)):
  df_new = gdf[['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'GID_2', 'NAME_2']]
  df_new['stats_item'] = [colnames[i]]*len(gdf.index)
  df_new['year'] = [i.split('_')[-1] for i in df_new['stats_item']]
  df_new['stats_value'] = gdf[colnames[i]]
  ls.append(df_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: 

In [20]:
df_reshaped = pd.concat([i for i in ls])

In [21]:
df_reshaped.to_csv(os.path.join(os.getcwd(), 'all_'+gid+'_5yGr.csv'), index=False)

### 3.5: Ten-year avg. Rate of Increase in Pond Area by Provinces / Districts

$ \frac{A_{T.tail} - A_{T.head}}{A_{T.head}} \times \frac{1}{T} \times 100 $

In [22]:
# Specify input path to the directory of output of step 3.1
os.chdir("/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1")

files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/KHM_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MMR_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/MYS_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/THA_gid2_#A.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.1/VNM_gid2_#A.geojson']

In [23]:
outpath = "/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/"
# Specify output path
gid = 'gid2'
# For naming the output files

In [24]:
countries = [i.split('/')[-1].split('_')[0] for i in files]

In [25]:
def genFun(n): # n=len(files)

  j = 0
  while j < n:
    df = gpd.read_file(files[j])

    x = range(1985, 2020)
    x = [str(i) for i in x]

    from more_itertools import split_after
    y = list(split_after(x, lambda x: x.endswith('0')))
    for i in range(1, len(y)):
      y[i] = [y[i-1][-1]] + y[i]

    category_yrs = [['pondArea_ha_'+str(i) for i in j] for j in y]

    colnames = [col for col in df.columns if 'pondArea_ha' in col]
    new_col = ['10y_avgGr_'+i[-1].split("_")[-1] for i in category_yrs]

    period = [[i for i in j if i in colnames] for j in category_yrs]

    #---------------------------------------------------
    def my_function(row):
      """For one row (admin region in the case), For every time period, 
      calculate the growth rate between the last value and the first non-zero value.
      
      This function is to be used with apply() function for on a DF"""

      new_attr = []

      for i in range(len(period)):
        cols = list(row[period[i]]) 
        # Select columns of a time period
        nzero_index = next((i for i, x in enumerate(cols) if x != 0), None) 
        # Find the index of the first non-zero element
      
        if nzero_index != None and len(cols[nzero_index:]) > 1:
          avgIn = (cols[-1] - cols[nzero_index]) / cols[nzero_index] / (len(cols[nzero_index:])-1) *100
          avgIn = round(avgIn, 1)
        else: 
          avgIn = None

        new_attr = new_attr + [avgIn]
      return new_attr
    #---------------------------------------------------

    new_attrs_byrow = df.apply(my_function, axis=1)

    new_attrs_bycol = [[i[j] for i in new_attrs_byrow] for j in range(len(new_attrs_byrow[0]))]

    df[new_col] = pd.Series(new_attrs_bycol)

    yield df.to_file(os.path.join(outpath, countries[j]+'_'+gid+'_10yGr.geojson'), driver='GeoJSON')
    j += 1

gen = genFun(len(files))

In [26]:
# Exhaust Generator
# Reference: https://stackoverflow.com/questions/47456631/simpler-way-to-run-a-generator-function-without-caring-about-items
from collections import deque

def exhaust(generator):
    deque(generator, maxlen=0)

exhaust(gen)

In [27]:
os.chdir(outpath)
files = glob(os.path.join(os.getcwd(), '*'))
files

['/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/KHM_gid2_10yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/MMR_gid2_10yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/MYS_gid2_10yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/THA_gid2_10yGr.geojson',
 '/content/drive/MyDrive/THESIS_AQUAPONDS/Aquaculture_SharedFolder_Kemeng_Marco/template_data/3.5/VNM_gid2_10yGr.geojson']

In [28]:
dfs = [gpd.read_file(i) for i in files]

In [29]:
df_concat = pd.concat([i for i in dfs])

In [30]:
df_concat.to_file(os.path.join(os.getcwd(), 'all_'+gid+'_10yGr.geojson'), driver='GeoJSON')