In [None]:
import os
import gc
from tqdm import tqdm
import pandas as pd
import geopandas as gpd

In [None]:
def green_area_by_FUA(read_folder, geo_source, nuts):
# This function is to calculate the total green areas and urban areas of each FUA by the nuts boundaries
    for folder in tqdm(os.listdir(read_folder)):
        # get the country id and ua id of each shp
        country_id = folder.split('_')[0][:2]
        ua_num = folder.split('_')[0][2:5]
    
        # read the shp and separate green area from the total area
        sub_folder = read_folder + r'/' + folder + r'/' + 'Data' + r'/'
        ua = gpd.read_file(sub_folder + os.listdir(sub_folder)[0])
        ua.to_crs('EPSG:3035', inplace = True)
        green = ua[ua['class_2018']=='Green urban areas']
        
        # get the total area and total green area of shp that falls within the boundary of NTUS regions in the same country
        country_shp = nuts[nuts['NUTS_ID'].str.contains(country_id)]
        dict_temp = {}
        for subregion_id in country_shp['NUTS_ID']:
            subregion = country_shp[country_shp['NUTS_ID']==subregion_id]
            Green_temp = gpd.sjoin(green, subregion, how='inner')
            Green_temp['shape area'] = Green_temp.area
            total_temp = gpd.sjoin(ua, subregion, how='inner')
            total_temp['shape area'] = total_temp.area
            dict_temp[subregion_id] = [Green_temp['shape area'].sum(),total_temp['shape area'].sum()]
    
        # record the green and total area and export the dataframe
        df_temp = pd.DataFrame([dict_temp]).T
        df_temp['green urban area'] = df_temp[0].apply(lambda x: x[0])
        df_temp['total functional urban area'] = df_temp[0].apply(lambda x: x[1])
        save_folder = '/data/xiang/3-case studies/1-urban atlas' + r'/' + geo_source 
        if not os.path.exists(save_folder + r'/' + 'FUAs'):
            os.makedirs(save_folder + r'/' + 'FUAs')
        df_temp[['green urban area','total functional urban area']].to_excel(save_folder + r'/' + 'FUAs' + r'/' + country_id + '_' + ua_num + '.xlsx')

        del ua
        gc.collect()
        
    return save_folder

In [None]:
def green_area_by_country(geo_ref_folder):
    file_list = os.listdir(geo_ref_folder + r'/' + 'FUAs')
    country_set = set([i.split('_')[0] for i in file_list if i.endswith('.xlsx')])
    if not os.path.exists(geo_ref_folder + r'/' + 'country merge'):
            os.makedirs(geo_ref_folder + r'/' + 'country merge')
    for country in country_set:
        file_country_list = [i for i in file_list if i.split('_')[0] == country]
        df_comb = None
        for file in file_country_list:
            if file.endswith('.xlsx'):
                # for FUAs in the same country, sum the total of green and total areas to each NUTS level
                df_temp = pd.read_excel(geo_ref_folder + r'/' + 'FUAs' + r'/' + file, index_col=0)
                df_comb = df_temp if df_comb is None else df_comb + df_temp
            else:
                pass
        df_comb.to_excel(geo_ref_folder + r'/' + 'country merge' + r'/' + country + '.xlsx')

In [None]:
def compile_table(geo_ref_folder, geo_source):
    df_concat = None
    for file in os.listdir(geo_ref_folder + r'/' + 'country merge'):
        df_temp = pd.read_excel(geo_ref_folder + r'/' + 'country merge' + r'/' + file)
        df_concat = df_temp if df_concat is None else pd.concat([df_concat, df_temp])
    df_concat.columns = ['geo', 'green urban area', 'total functional urban area']
    df_concat['fraction green urban area'] = 100*df_concat['green urban area']/df_concat['total functional urban area']
    df_concat['obsTime'] = '2018'
    df_concat['freq'] = '6 years'
    
    reshap_df = pd.melt(df_concat, id_vars=['geo', 'freq', 'obsTime'], value_vars = ['green urban area', 'total functional urban area', 'fraction green urban area'], var_name = 'indicator', value_name = 'obsValue')
    reshap_df['unit'] = reshap_df['indicator'].apply(lambda x: 'percentage' if x == 'fraction green urban area' else 'square meters')
    reshap_df.to_csv(geo_ref_folder + r'/' + 'Urban green area' + geo_source + '.csv')

In [None]:
if __name__ == '__main__':
    read_folder = '/data/xiang/1-Data/Urban atlas'
    nuts_folder = '/data/xiang/1-Data/NUTS/nuts version'
    nuts_list = ['NUTS_RG_01M_2021_3035.shp','NUTS_RG_01M_2010_3035.shp', 'NUTS_RG_01M_2006_3035.shp', 'NUTS_RG_01M_2013_3035.shp', 'NUTS_RG_01M_2016_3035.shp', 'NUTS_RG_01M_2003_3035.shp']
    for nuts_file in nuts_list:
    # for nuts_file in os.listdir(nuts_folder):
        if nuts_file.endswith('.shp'):
            geo_source = 'NUTS' + nuts_file.split('_')[-2]
            nuts_file = nuts_folder + r'/' + nuts_file
            nuts = gpd.read_file(nuts_file)
            geo_ref_folder = green_area_by_FUA(read_folder, geo_source, nuts)
            green_area_by_country(geo_ref_folder)
            compile_table(geo_ref_folder, geo_source)