In [2]:
# %load first_cell.py
%reload_ext autoreload
%autoreload 2
from pathlib import Path
home = str(Path.home())

import sys
sys.path = sys.path + [f'{home}/.conda/envs/norm_env/lib/python37.zip', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7/lib-dynload', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7/site-packages',
                     '../src']
sys.prefix = '/home/joaom/.conda/envs/norm_env'

from paths import RAW_PATH, TREAT_PATH, OUTPUT_PATH, FIGURES_PATH

from copy import deepcopy
import numpy as np
import pandas as pd
pd.options.display.max_columns = 999
import yaml
import matplotlib.pyplot as plt 
import datetime

import warnings
warnings.filterwarnings('ignore')

# Plotting
import plotly
import plotly.graph_objs as go
import cufflinks as cf
plotly.offline.init_notebook_mode(connected=True)

def iplottitle(title, width=40):
    return '<br>'.join(textwrap.wrap(title, width))

# Setting cufflinks
import textwrap
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
import yaml
from jinja2 import Template
cf.themes.THEMES['custom'] = yaml.load(open('cufflinks_template.yaml', 'r'))

In [3]:
from shapely import wkt
import geopandas as gpd
from rdp import rdp
import unicodedata

In [4]:
columns_order = [
    'country_name',
    'country_iso',
    'region_slug',
    'region_name',
    'region_type',
    'dashboard',
    'population',
    'timezone',
    'region_shapefile_wkt'
]

In [5]:
def valid_wkt(x):
    
    try:
        wkt.loads(x)
        return True
    except Exception as e:
        print(e)
        return False

In [7]:
def check_length(df, threshold=50000):
    
    return all(df['region_shapefile_wkt'].apply(lambda x: len(x.to_wkt()) < threshold))
        

In [225]:
def simplify(df, delta=0.001):
    
    while not check_length(df):
        df['region_shapefile_wkt'] = df['region_shapefile_wkt'].apply(lambda x: x.simplify(delta, False))
        delta = delta + 0.05

    return df

In [9]:
def make_region_slug(df, slug_preffix):
    
    df['region_slug'] = df['region_name'].apply(lambda x: (slug_preffix + x.lower()).replace(' ', '_'))
    return df

In [26]:
def add_other_columns(df, columns_values):

    for name, value in columns_values.items():
        df[name] = value
    return df

In [72]:
def remove_accents(input_str):
    return unicodedata.normalize('NFKD', input_str).encode('ASCII', 'ignore').decode('ASCII')

def remove_all_accents(df, columns_accents=['region_slug', 'region_name']):

    for c in df.columns:
        try:
            df[c] = df[c].apply(lambda x: remove_accents(x))
        except:
            pass
    return df

In [77]:
def save_df(df, columns_order, name):
    df[columns_order].to_csv(OUTPUT_PATH / 'regions' / f'{name}.csv', index=False)

# Comunas Chile

## Read and Filter file

In [226]:
df = gpd.read_file(RAW_PATH / 'regions' / 'comunas_shape')

In [227]:
df = df.to_crs("epsg:4326")

In [228]:
# df = pd.read_excel(RAW_PATH / 'regions' / 'Comunas_Santiago_Chile_WKT.xlsx')
pop = pd.read_excel(RAW_PATH / 'regions' / 'Poblacion_comuna_censo201.xlsx')

In [229]:
df['NOM_COM']  = df['NOM_COM'].apply(lambda x: x.upper())

In [230]:
pop[pop['NOMBRE COMUNA'] == 'ISLA DE MAIPO']

Unnamed: 0,NOMBRE COMUNA,Codigo_Comuna,Hombres,Mujeres,TOTAL
131,ISLA DE MAIPO,13603,18051,18168,36219


In [231]:
df = df.merge(pop[['NOMBRE COMUNA', 'TOTAL']], right_on='NOMBRE COMUNA', left_on='NOM_COM')

In [232]:
df.columns

Index(['NOM_REG', 'NOM_PROV', 'NOM_COM', 'SHAPE_LENG', 'DIS_ELEC', 'CIR_SENA',
       'COD_COMUNA', 'SHAPE_Le_1', 'SHAPE_Area', 'geometry', 'NOMBRE COMUNA',
       'TOTAL'],
      dtype='object')

In [233]:
df = df[['geometry', 'NOM_COM', 'TOTAL']].rename(columns={'geometry': 'region_shapefile_wkt', 'NOM_COM': 'region_name',
                                                    'TOTAL': 'population'})

## Check for invalid WKTs



In [234]:
# not_valid = df[~df['region_shapefile_wkt'].apply(valid_wkt)]
# df = df[df['region_shapefile_wkt'].apply(valid_wkt)]
# not_valid.to_csv(TREAT_PATH / 'comunas_invalid_wkt.csv')

## Simplify WKTs

In [235]:
df = simplify(df)

## Make region_slug

In [236]:
df = make_region_slug(df, 'ch_comunas_')

## Add remaining columns

In [237]:
columns_values = {
    'country_name': 'Chile',
    'country_iso': 'CL',
    'region_type': 'submetro',
    'dashboard': 'FALSE',
    'timezone': 'America/Santiago'
}
for name, value in columns_values.items():
    df[name] = value

## Make sure there is no accents

In [238]:
columns_accents = ['region_slug', 'region_name']
for c in columns_accents:
    df[c] = df[c].apply(lambda x: remove_accents(x))

In [245]:
df['region_shapefile_wkt'] = df['region_shapefile_wkt'].apply(str)

In [246]:
df[columns_order].to_csv(OUTPUT_PATH / 'regions' / 'chile_comunas.csv', index=False)

In [247]:
df.head()

Unnamed: 0,region_shapefile_wkt,region_name,population,region_slug,country_name,country_iso,region_type,dashboard,timezone
0,POLYGON ((-71.26653727615358 -33.4100898105313...,MARIA PINTO,13590,ch_comunas_maria_pinto,Chile,CL,submetro,False,America/Santiago
1,POLYGON ((-71.5386268365253 -33.76415338666177...,SAN PEDRO,9726,ch_comunas_san_pedro,Chile,CL,submetro,False,America/Santiago
2,POLYGON ((-70.6573023154117 -33.40478555612682...,INDEPENDENCIA,100281,ch_comunas_independencia,Chile,CL,submetro,False,America/Santiago
3,POLYGON ((-70.43946260195776 -33.4920620816655...,LA FLORIDA,366916,ch_comunas_la_florida,Chile,CL,submetro,False,America/Santiago
4,POLYGON ((-70.47723513261619 -33.3695027096460...,LO BARNECHEA,105833,ch_comunas_lo_barnechea,Chile,CL,submetro,False,America/Santiago


# Brazilian States

In [175]:
columns_values = {
    'country_name': 'Brasil',
    'country_iso': 'BR',
    'region_type': 'state',
    'dashboard': 'FALSE',
    'timezone': None,
    'population': None
}

## Download data

In [176]:
df = pd.read_csv(RAW_PATH / 'regions' / 'states_brasil.csv')

In [177]:
df = df[['name_state', 'geometry']]
df.columns = ['region_name', 'region_shapefile_wkt']

In [178]:
df['region_shapefile_wkt'] = df['region_shapefile_wkt'].apply(wkt.loads)

In [179]:
df = simplify(df)

In [180]:
df = make_region_slug(df, 'br_states_')

In [181]:
df = add_other_columns(df, columns_values)

In [182]:
df = remove_all_accents(df)

In [183]:
save_df(df, columns_order, name='br_states')

In [184]:
df.head()

Unnamed: 0,region_name,region_shapefile_wkt,region_slug,country_name,country_iso,region_type,dashboard,timezone,population
0,Rondonia,POLYGON ((-63.32720817710296 -7.97672029882056...,br_states_rondonia,Brasil,BR,state,False,,
1,Acre,POLYGON ((-73.18252539408962 -7.33549646429577...,br_states_acre,Brasil,BR,state,False,,
2,Amazonas,POLYGON ((-67.32608847052386 2.029713905720171...,br_states_amazonas,Brasil,BR,state,False,,
3,Roraima,POLYGON ((-60.20050657607426 5.264343437750694...,br_states_roraima,Brasil,BR,state,False,,
4,Para,POLYGON ((-54.95430669626876 2.583692423482197...,br_states_para,Brasil,BR,state,False,,
