In [3]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import geopandas as gpd
import rioxarray
import json
import rasterio
from rioxarray.merge import merge_arrays
from shapely.geometry import Polygon

pd.set_option("max_colwidth", None)

In [4]:
# define a function to extract the landcover data to a geopandas dataframe
def extract_landcover_data(RCP="RCP26", model="GFDL", year= '2050'):
    # get the landcover data
    data_path = "D:\hotspot mapping\Land_Use_Harmonization_V2_1721\data"

    file_path = data_path + f"\{RCP}_{model}_states.nc4"
    # Load .nc4 file
    dataset = xr.open_dataset(file_path)

    datetime = f'{year}-01-01 00:00:00'
    
    # traverse all the variables in the dataset
    gdf_list = []
    final_gdf = gpd.GeoDataFrame()
    for land in dataset.data_vars:
        if land=="crs":
            continue
        # print(land)
        df = dataset[land].sel(time=datetime).to_dataframe().reset_index()
        gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)).drop(["lat","lon","time"], axis=1)
        # print(gdf.head())
        # print(final_gdf.head())
        if final_gdf.empty:
            final_gdf = gdf
        else:
            final_gdf = gpd.sjoin(final_gdf, gdf, how="left")
            # remove the duplicate columns
            final_gdf = final_gdf.drop(["index_right"], axis=1)
    
    # df = dataset.sel(time=datetime).to_dataframe()
    return final_gdf

In [8]:
def process_land_cover_gdf(area_gdf, area_polygon=None):
    # change the geometry from point to polygon
    area_gdf['geometry'] = area_gdf['geometry'].apply(lambda x: Polygon([[x.x-0.125, x.y-0.125], [x.x-0.125, x.y+0.125], [x.x+0.125, x.y+0.125], [x.x+0.125, x.y-0.125]]))
    
    # get polygon of costa rica
    if(area_polygon==None):
        url = 'https://raw.githubusercontent.com/HotspotStoplight/HotspotStoplight/main/CropBoxes/CR_Crop3.geojson'
        gdf2 = gpd.read_file(url)

        area_polygon = gdf2.geometry[0]
    
    # get the intersection of the landcover data and the area polygon
    area_gdf['intersection'] = area_gdf.intersection(area_polygon)
    area_gdf['area'] = area_gdf['intersection'].area
    # 12 land cover types
    land_cover_list = ['primf', 'primn', 'secdf', 'secdn', 'urban', 'c3ann', 'c4ann', 'c3per', 'c4per', 'c3nfx', 'pastr', 'range']
    
    # impute the null values with 0
    area_gdf['area'] = area_gdf['area'].fillna(0)
    # get the total fraction for each row
    area_gdf['total_fraction']=area_gdf.apply(lambda x: sum(x[land_cover_list]), axis=1)
    # remove rows with 0 total fraction
    area_gdf=area_gdf[area_gdf['total_fraction']>0]

    return area_gdf

In [16]:
def get_landcover_demand_from_gdf(area_gdf):
    # calculate the demand for each landcover type
    land_cover_list = ['primf', 'primn', 'secdf', 'secdn', 'urban', 'c3ann', 'c4ann', 'c3per', 'c4per', 'c3nfx', 'pastr', 'range']
    land_cover_demand = []
    for land in land_cover_list:
        # print(land)
        area_gdf['adjusted_{}'.format(land)] = area_gdf.apply(lambda x: x[land] / x['total_fraction'], axis=1)
        land_cover_demand.append((area_gdf['adjusted_{}'.format(land)] * area_gdf['area']).sum())
    total_area = area_gdf['area'].sum()
    land_cover_demand = [x/total_area for x in land_cover_demand]
    return land_cover_demand

In [11]:
gdf = extract_landcover_data()

primf
primn
secdf
secdn
urban
c3ann
c4ann
c3per
c4per
c3nfx
pastr
range
secmb
secma


In [12]:
area_gdf = process_land_cover_gdf(gdf)

In [20]:
area_gdf.head()

Unnamed: 0,primf,geometry,primn,secdf,secdn,urban,c3ann,c4ann,c3per,c4per,...,adjusted_secdf,adjusted_secdn,adjusted_urban,adjusted_c3ann,adjusted_c4ann,adjusted_c3per,adjusted_c4per,adjusted_c3nfx,adjusted_pastr,adjusted_range
197722,0.0,"POLYGON ((-69.50000 -55.75000, -69.50000 -55.50000, -69.25000 -55.50000, -69.25000 -55.75000, -69.50000 -55.75000))",0.023673,0.0,0.000167,0.0,1e-06,1.956157e-07,4.078771e-07,0.0,...,0.0,0.006955,0.0,6.1e-05,8e-06,1.7e-05,0.0,8e-06,0.001375,0.003097
197726,0.0,"POLYGON ((-68.50000 -55.75000, -68.50000 -55.50000, -68.25000 -55.50000, -68.25000 -55.75000, -68.50000 -55.75000))",0.022749,0.0,1.2e-05,0.0,1e-06,1.867666e-07,3.894261e-07,0.0,...,0.0,0.000492,0.0,5.8e-05,8e-06,1.6e-05,0.0,8e-06,0.0,0.057333
197727,0.0,"POLYGON ((-68.25000 -55.75000, -68.25000 -55.50000, -68.00000 -55.50000, -68.00000 -55.75000, -68.25000 -55.75000))",0.362445,0.0,0.000841,0.0,2.2e-05,2.980998e-06,6.215662e-06,0.0,...,0.0,0.001539,0.0,4.1e-05,5e-06,1.1e-05,0.0,5e-06,0.0,0.335221
197728,0.0,"POLYGON ((-68.00000 -55.75000, -68.00000 -55.50000, -67.75000 -55.50000, -67.75000 -55.75000, -68.00000 -55.75000))",0.017771,0.0,4.2e-05,0.0,0.0,0.0,0.0,0.0,...,0.0,0.001345,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.425741
199160,0.05684,"POLYGON ((-70.00000 -55.50000, -70.00000 -55.25000, -69.75000 -55.25000, -69.75000 -55.50000, -70.00000 -55.50000))",0.0,0.013847,0.0,0.0,4e-06,5.800343e-07,1.209426e-06,0.0,...,0.194996,0.0,0.0,6.1e-05,8e-06,1.7e-05,0.0,8e-06,0.001375,0.003097


In [17]:
land_cover_demand = get_landcover_demand_from_gdf(area_gdf)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [18]:
land_cover_demand

[0.1369010939230731,
 0.028145393314353166,
 0.4662762312749234,
 0.006193776426847757,
 0.023083220712762278,
 0.008832297743923214,
 0.0013106345597802959,
 0.0721982549113322,
 0.029430420688512603,
 0.003368608298398744,
 0.21543268791209944,
 0.008827380233993857]

In [21]:
land_cover_demand_dict = dict(zip(['primf', 'primn', 'secdf', 'secdn', 'urban', 'c3ann', 'c4ann', 'c3per', 'c4per', 'c3nfx', 'pastr', 'range'], land_cover_demand))

In [25]:
# convert the dictionary to a csv file
land_cover_demand_df = pd.DataFrame(land_cover_demand_dict, index=[0])
# add the row key
land_cover_demand_df.to_csv('land_cover_demand.csv', index=False, header=True)

In [None]:
def get_landcover_demand(year= '2050', area_polygon=None , RCP="RCP26", model="GFDL"):
    gdf = extract_landcover_data(year=year)

    area_gdf = process_land_cover_gdf(gdf)

    land_cover_demand = get_landcover_demand_from_gdf(area_gdf)

    return land_cover_demand