# Extract data for urban calculations

Test input for Tanzania

0. Select focal ADM, buffer by 1km, rasterize as [0, 1]
1. Download DEM data from ASTER, mosaick
2. Calculate slope of DEM
3. Extract water layer from Globcover
4. Rasterize building footprints
5. Select population layer
6. Standardize all rasters to population layer  
   a. Set area outside of focal admin to NoData  
   b. Set everything to 16bit  
   
   


In [1]:
import sys, os, importlib, shutil, pathlib, datetime
import requests
import rasterio, elevation, richdem
import rasterio.warp
from rasterio import features

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import MultiPolygon, Polygon, box, Point

#Import raster helpers
sys.path.append("../../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint

#Import GOST urban functions
sys.path.append("../../src")
import GOST_Urban.UrbanRaster as urban
import GOST_Urban.urban_helper as helper

#Import local functions
import novelUrbanization as nu
from novelUrbanization import *

%load_ext autoreload
%autoreload 2

In [2]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"

inG = gpd.read_file(global_bounds)
inG2 = gpd.read_file(global_bounds_adm2)

runSmall = True
runLarge = True

# Convert EA csv files to geometry

In [None]:
in_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/"
ea_files = []
for root, dirs, files in os.walk(in_folder):
    for x in files:
        if ((x.endswith(".csv")) and (not "URBAN" in x)):
            ea_files.append(os.path.join(root, x))
            
ea_files

In [None]:
pd.read_csv(ea_files[-1]).head()

In [None]:
def try_float(x):
    try:
        return(float(x))
    except:
        return(None)

def read_geog(file, lat_column, lon_column, crs='epsg:4326', write_out=True):
    print(os.path.basename(file))
    out_file = file.replace(".csv", ".geojson")
    inD = pd.read_csv(file)
    
    print(inD.shape)
    inD[lat_column] = inD[lat_column].apply(try_float)
    inD[lon_column] = inD[lon_column].apply(try_float)    
    inD = inD.loc[~(inD[lat_column].isna() | inD[lon_column].isna())]
    print(inD.shape)
    
    inD_geom = inD.apply(lambda x: Point(float(x[lon_column]), float(x[lat_column])), axis=1)
    inD = gpd.GeoDataFrame(inD, geometry=inD_geom, crs=crs)
    
    if write_out:
        inD.to_file(out_file, driver="GeoJSON")        
    return(inD)

#res = read_geog(ea_files[0], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[1], "lat", "lon")
#res = read_geog(ea_files[2], "latitude", "longitude")
#res = read_geog(ea_files[3], "latitude", "longitude")
#res = read_geog(ea_files[4], "lat_mean", "long_mean")
#res = read_geog(ea_files[5], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[6], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[7], "lat_modified","lon_modified")
#res = read_geog(ea_files[8], "lat_corrected", "lon_corrected")
#res = read_geog(ea_files[9], "lat_corrected", "lon_corrected")
res = read_geog(ea_files[-1], "latDD_corrected", "lonDD_corrected")

# Run individual counties

In [None]:
importlib.reload(helper)
# Process GHA
iso3 = "GHA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2017_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2017_%s_UNadj.tif" % iso3)

pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo17.tif" % iso3.lower()], 
             [pop_2015_con, "%s_cpo15.tif" % iso3.lower()], 
             [pop_2018_con, "%s_cpo17.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
#nu.check_no_data(output_folder)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "ghana"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
nu.calculate_urban?

In [None]:
importlib.reload(nu)
# Process COD
iso3 = "COD"
local_path = "/home/public/Data/COUNTRY/{country}/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = ''
#ea_file = os.path.join(output_folder, "admin3", "Ethiopia_pti_admin3.shp")
#ea_file = os.path.join(output_folder, "Kebeles", "all_kebeles.shp")
#ea_file = os.path.join(output_folder, "gadm36_ETH_2.shp")

nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge,include_ghsl_h20=False)
#pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "ethiopia"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation_admin3.csv"))

In [None]:
# Process ETH
iso3 = "ETH"
local_path = "/home/public/Data/COUNTRY/{country}/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2016_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo16.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "admin3", "Ethiopia_pti_admin3.shp")
#ea_file = os.path.join(output_folder, "Kebeles", "all_kebeles.shp")
#ea_file = os.path.join(output_folder, "gadm36_ETH_2.shp")

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "ethiopia"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation_admin3.csv"))

In [None]:
input_file = os.path.join(output_folder, "HBS_GPS.csv")
pop_tiffs = ["eth_gpo.tif", "eth_upo15.tif", 'eth_upo16.tif']
all_tiffs = []
base_folder = os.path.join(output_folder, "FINAL_STANDARD")
base_folder_1km = os.path.join(output_folder, "FINAL_STANDARD_1KM")
for pFile in pop_tiffs:
    all_tiffs.append(os.path.join(base_folder, pFile))
    all_tiffs.append(os.path.join(base_folder_1km, pFile.replace("eth", "eth1k")))    

# Read in ETH HH locations, clean
inD = pd.read_csv(input_file)
inD = inD.loc[~inD['latDD_corrected'].isnull()]
inD = inD.loc[~inD['lonDD_corrected'].isnull()]
geoms = [Point(row['lonDD_corrected'], row['latDD_corrected']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})
# Calculate point urbanization for degree of urbanization
out_file = os.path.join(output_folder, f"{iso3}_DoU_Urban.csv")
nu.point_urban_summaries(inD, all_tiffs, out_file)
# Calculate point urbanization for PP urban
out_file = os.path.join(output_folder, f"{iso3}_DB_Urban.csv")
in_folder = os.path.join(output_folder, "ethiopia")
pop_tiffs = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]
nu.pp_point_urban_summaries(inD, pop_tiffs, out_file)

In [None]:
importlib.reload(helper)
# Process COL
iso3 = "COL"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
#ea_file = os.path.join(output_folder, 'MGN2020_RUR_SECCION', 'MGN_RUR_SECCION.shp')
ea_file = os.path.join(output_folder, 'MGN2020_URB_SECCION', 'MGN_URB_SECCION.shp')
#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "colombia"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
importlib.reload(helper)
# Process EGY
iso3 = "EGY"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2013_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo16.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3

ea_file = os.path.join(output_folder, "EGY_adm3.shp")

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "egypt"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
importlib.reload(helper)
# Process AGO
iso3 = "AGO"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'admin', 'bairros.shp')

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "angola"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
importlib.reload(novelUrbanization)
# Process BGD
iso3 = "BGD"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')

#nu.calculate_urban(iso3, inG, inG2, pop_files, '', small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "bangladesh"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
importlib.reload(helper)
# Process VNM
iso3 = "VNM"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'VN_communes2008.shp')

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "vietnam"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation.csv"))

In [None]:
importlib.reload(helper)
# Process TZA
iso3 = "TZA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2018_%s_UNadj.tif" % iso3)

pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()], 
             [pop_2015_con, "%s_cpo15.tif" % iso3.lower()], 
             [pop_2018_con, "%s_cpo18.tif" % iso3.lower()]]

output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "villages", "tzvillage.shp")

nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "tanzania"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation_village.csv"))

In [None]:
# Process point location analysis
input_file = os.path.join(output_folder, "sample_imp.csv")
inD = pd.read_csv(input_file)
geoms = [Point(row['gps_imp_lo'], row['gps_imp_la']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})

In [None]:
# Summarize Urban definitions
base_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/"
base_folder_1km = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/"
pop_tiffs = ['tza_upo15.tif','tza_upo18.tif','tza_cpo15.tif','tza_cpo18.tif','tza_gpo.tif']
final_tiffs = []
for p in pop_tiffs:
    final_tiffs.append(os.path.join(base_folder, p))
    final_tiffs.append(os.path.join(base_folder_1km, p.replace("tza", "tza1k")))
    
out_file = input_file.replace(".csv", "_urban.csv")
nu.point_urban_summaries(inD, final_tiffs, out_file)


In [None]:
# Calculate point urbanization for PP urban
out_file = input_file.replace(".csv", "_urban_PP.csv")
in_folder = os.path.join(output_folder, "tanzania")
pop_tiffs = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]
nu.pp_point_urban_summaries(inD, pop_tiffs, out_file)

# Compile and copy mapping data

In [None]:
countries = {'AGO':'angola','BGD':'bangladesh','EGY':'egypt','ETH':'ethiopia',
             'GHA':'ghana','TZA':'tanzania','VNM':'vietnam'}
for iso3 in countries.keys():
    out_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_Data"
    data_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming/" % iso3
    dou_folder = os.path.join(data_folder, "FINAL_STANDARD")
    db_folder  = os.path.join(data_folder, countries[iso3])
    
    dou_urban = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban.tif')
    dou_urban_hd = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban_hd.tif')
    
    db_urban_cc = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_cc.tif")
    db_urban_co = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_co.tif")
    db_urban_ur = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_ur.tif")
    
    for uFile in [dou_urban, dou_urban_hd, db_urban_cc, db_urban_co, db_urban_ur]:
        print(f'{iso3}: {os.path.exists(uFile)}')
        out_file = os.path.join(out_folder, os.path.basename(uFile))
        shutil.copy(uFile, out_file)
        

In [None]:
shutil.copy?

# Compile zonal results

In [14]:
in_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/"
out_folder = os.path.join(in_folder, "URBAN_ZONAL_RESULTS")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
for root, dirs, files in os.walk(in_folder):
    if "URBAN_DATA_new_naming" in root:        
        country = os.path.basename(root).split("_")[0]        
        if country in nu.EA_DEFS.keys():
            for f in files:
                if ("EA_PP_URBAN" in f) | ("EA_WB_URBAN" in f) | ("HH_GPS" in f):
                    fName = pathlib.Path(os.path.join(root, f))   
                    date = datetime.datetime.fromtimestamp(fName.stat().st_mtime)
                    if datetime.datetime(2021,6,1) < date:
                        print(f'{country}: {f} - {date}') 
                    else:
                        print(f'***OLD: {country}: {f} - {date}') 
                    shutil.copy(os.path.join(root, f), os.path.join(out_folder, f))

AGO: AGO_HH_GPS_WB_URBAN.csv - 2022-06-07 11:41:21.951099
AGO: AGO_HH_GPS_PP_URBAN.csv - 2022-06-07 11:41:32.765155
AGO: AGO_EA_PP_URBAN.csv - 2022-06-07 14:18:35.263645
AGO: AGO_EA_WB_URBAN.csv - 2022-06-07 21:06:52.371177
ETH: ETH_HH_GPS_WB_URBAN.csv - 2022-06-06 14:21:00.484850
ETH: ETH_HH_GPS_PP_URBAN.csv - 2022-06-06 14:24:16.564839
ETH: ETH_EA_WB_URBAN.csv - 2022-06-07 21:12:17.944850
ETH: ETH_EA_PP_URBAN.csv - 2022-06-07 21:14:26.158509
BFA: BFA_HH_GPS_WB_URBAN.csv - 2022-06-06 14:20:44.531770
BFA: BFA_HH_GPS_PP_URBAN.csv - 2022-06-06 14:20:51.765806
BFA: BFA_EA_WB_URBAN.csv - 2022-06-07 21:12:18.841855
BFA: BFA_EA_PP_URBAN.csv - 2022-06-07 21:15:21.464793
CIV: CIV_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:55.492918
CIV: CIV_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:07.917980
CIV: CIV_EA_WB_URBAN.csv - 2022-06-07 21:11:58.558751
CIV: CIV_EA_PP_URBAN.csv - 2022-06-07 21:12:30.087913
GAB: GAB_HH_GPS_WB_URBAN.csv - 2022-06-06 14:19:16.228325
GAB: GAB_HH_GPS_PP_URBAN.csv - 2022-06-06 14:2

In [12]:
# Delete all zonal stats
for root, dirs, files in os.walk(in_folder):
    if "URBAN_DATA_new_naming" in root:        
        country = os.path.basename(root).split("_")[0]        
        if country in nu.EA_DEFS.keys():
            for f in files:
                if ("URBAN_COMMUNE_STATS" in f) | ("URBAN_ADMIN2" in f):
                    print(f'{country}: {f}')
                    os.remove(os.path.join(root, f))

AGO: URBAN_ADMIN2_STATS_COMPILED.csv
AGO: URBAN_ADMIN2_STATS_COMPILED_1k.csv
AGO: AGO_URBAN_ADMIN2_STATS_COMPILED_1k.csv
AGO: AGO_URBAN_ADMIN2_STATS_COMPILED.csv
ETH: URBAN_ADMIN2_STATS_COMPILED_1k.csv
ETH: ETH_URBAN_ADMIN2_STATS_COMPILED.csv
ETH: URBAN_ADMIN2_STATS_COMPILED.csv
BFA: BFA_URBAN_ADMIN2_STATS_COMPILED_1k.csv
BFA: BFA_URBAN_ADMIN2_STATS_COMPILED.csv
CIV: URBAN_ADMIN2_STATS_COMPILED_1k.csv
CIV: URBAN_ADMIN2_STATS_COMPILED.csv
CIV: CIV_URBAN_ADMIN2_STATS_COMPILED_1k.csv
CIV: CIV_URBAN_ADMIN2_STATS_COMPILED.csv
GAB: URBAN_ADMIN2_STATS_COMPILED_1k.csv
GAB: URBAN_ADMIN2_STATS_COMPILED.csv
GAB: GAB_URBAN_ADMIN2_STATS_COMPILED_1k.csv
GAB: GAB_URBAN_ADMIN2_STATS_COMPILED.csv
GIN: GIN_URBAN_ADMIN2_STATS_COMPILED_1k.csv
GIN: GIN_URBAN_ADMIN2_STATS_COMPILED.csv
GNB: URBAN_ADMIN2_STATS_COMPILED_1k.csv
GNB: URBAN_ADMIN2_STATS_COMPILED.csv
GNB: GNB_URBAN_ADMIN2_STATS_COMPILED_1k.csv
GNB: GNB_URBAN_ADMIN2_STATS_COMPILED.csv
LSO: URBAN_ADMIN2_STATS_COMPILED_1k.csv
LSO: URBAN_ADMIN2_STATS_

# Generating zip commands

In [None]:
# Delete existing files
in_folder = "/home/wb411133/temp"
for root, dirs, files in os.walk(in_folder):
    for d in dirs:
        if (d == "FINAL_STANDARD") or (d == "FINAL_STANDARD_1KM"):
            cur_dir = os.path.join(root, d)
            print("zip -r {out_file} {infolder}".format(
                out_file = "%s_%s.zip" % (cur_dir.split("/")[-2].split("_")[0], cur_dir.split("_")[-1]),
                infolder = os.path.join(os.path.basename(os.path.dirname(cur_dir)), os.path.basename(cur_dir))))

# Debugging

In [8]:
file1 = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_cpo20.tif.csv"
file2 = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_gpo.tif.csv"

inD1 = pd.read_csv(file1, index_col=0)
inD2 = pd.read_csv(file2, index_col=0)

In [9]:
inD1.head()

Unnamed: 0,TOTALPOP_lso1k_upo15.tif_SUM,TOTALPOP_lso1k_upo15.tif_MIN,TOTALPOP_lso1k_upo15.tif_MAX,TOTALPOP_lso1k_upo15.tif_MEAN,_lso1k_upo15_urban.tif_SUM,_lso1k_upo15_urban.tif_MIN,_lso1k_upo15_urban.tif_MAX,_lso1k_upo15_urban.tif_MEAN,_lso1k_upo15_urban_hd.tif_SUM,_lso1k_upo15_urban_hd.tif_MIN,...,TOTALPOP_lso1k_cpo20.tif_MAX,TOTALPOP_lso1k_cpo20.tif_MEAN,_lso1k_cpo20_urban.tif_SUM,_lso1k_cpo20_urban.tif_MIN,_lso1k_cpo20_urban.tif_MAX,_lso1k_cpo20_urban.tif_MEAN,_lso1k_cpo20_urban_hd.tif_SUM,_lso1k_cpo20_urban_hd.tif_MIN,_lso1k_cpo20_urban_hd.tif_MAX,_lso1k_cpo20_urban_hd.tif_MEAN
0,13202.013361,6.617668,223.397564,42.314145,0.0,-0.0,-0.0,0.0,0.0,-0.0,...,510.095188,32.118404,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0
1,8109.838146,15.672872,110.434791,47.987208,0.0,0.0,0.0,0.0,0.0,0.0,...,499.482139,36.601916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,24401.314121,4.628932,81.781152,20.050381,0.0,-0.0,-0.0,0.0,0.0,-0.0,...,377.777648,18.590265,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0
3,22974.25348,6.252507,242.562074,74.350335,0.0,0.0,0.0,0.0,0.0,0.0,...,379.54394,68.948419,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19419.332527,13.07432,111.64587,43.935141,0.0,0.0,0.0,0.0,0.0,0.0,...,316.083675,42.469249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
inD2.head()

Unnamed: 0,TOTALPOP_lso1k_gpo.tif_SUM,TOTALPOP_lso1k_gpo.tif_MIN,TOTALPOP_lso1k_gpo.tif_MAX,TOTALPOP_lso1k_gpo.tif_MEAN,_lso1k_gpo_urban.tif_SUM,_lso1k_gpo_urban.tif_MIN,_lso1k_gpo_urban.tif_MAX,_lso1k_gpo_urban.tif_MEAN,_lso1k_gpo_urban_hd.tif_SUM,_lso1k_gpo_urban_hd.tif_MIN,_lso1k_gpo_urban_hd.tif_MAX,_lso1k_gpo_urban_hd.tif_MEAN
0,1389.217625,0.0,676.388306,4.452621,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0
1,742.856262,0.0,742.856262,4.395599,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,27027.941792,0.0,7224.137207,22.208662,14351.857422,-0.0,7224.137207,11.630354,0.0,-0.0,-0.0,0.0
3,24098.434595,0.0,11767.498047,77.988461,20004.946777,0.0,11767.498047,60.990691,0.0,0.0,0.0,0.0
4,25709.078125,0.0,23150.865234,58.165335,23150.865234,0.0,23150.865234,52.377523,0.0,0.0,0.0,0.0


In [11]:
inD1.join(inD2)

ValueError: columns overlap but no suffix specified: Index(['TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',
       'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',
       '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',
       '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',
       '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',
       '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN'],
      dtype='object')

In [15]:
inD2.columns in inD1.columns

False

In [16]:
inD1.columns

Index(['TOTALPOP_lso1k_upo15.tif_SUM', 'TOTALPOP_lso1k_upo15.tif_MIN',
       'TOTALPOP_lso1k_upo15.tif_MAX', 'TOTALPOP_lso1k_upo15.tif_MEAN',
       '_lso1k_upo15_urban.tif_SUM', '_lso1k_upo15_urban.tif_MIN',
       '_lso1k_upo15_urban.tif_MAX', '_lso1k_upo15_urban.tif_MEAN',
       '_lso1k_upo15_urban_hd.tif_SUM', '_lso1k_upo15_urban_hd.tif_MIN',
       '_lso1k_upo15_urban_hd.tif_MAX', '_lso1k_upo15_urban_hd.tif_MEAN',
       'TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',
       'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',
       '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',
       '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',
       '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',
       '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN',
       'TOTALPOP_lso1k_cpo15.tif_SUM', 'TOTALPOP_lso1k_cpo15.tif_MIN',
       'TOTALPOP_lso1k_cpo15.tif_MAX', 'TOTALPOP_lso1k_cpo15.tif_MEAN',
       '_lso1k_cpo15_urban.tif_

In [17]:
inD2.columns

Index(['TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',
       'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',
       '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',
       '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',
       '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',
       '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN'],
      dtype='object')

In [18]:
pop_files = ['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo15.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo20.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif']

In [21]:
list(set(pop_files))

['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo20.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo15.tif']