# Refactor fields for a censo_gdf

This notebook takes as input a gdf containing censo data and transforms dtypes for columns with data to float type.

## Import libraries

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



In [21]:
input_dir = '../../data/external/temporal_todocker/arroyo_vivo/censo_mza_2020_cuenca.gpkg'
output_dir = '../../data/external/temporal_fromjupyter/arroyo_vivo/censo_mza_2020_cuenca_refactored.gpkg'

### Load base data

In [18]:
censo_gdf = gpd.read_file(input_dir)

# Show
print(censo_gdf.shape)
censo_gdf.head(1)

(1188, 239)


Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,ENTIDAD,NOM_ENT,...,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,geometry
0,1903900013659019,19,39,1,3659,19,Urbana,Típica,19,Nuevo León,...,,,,,,,,,,"POLYGON ((368847.538 2836039.654, 368856.076 2..."


### Transform data

In [19]:
censo_gdf.dtypes

CVEGEO           object
CVE_ENT          object
CVE_MUN          object
CVE_LOC          object
CVE_AGEB         object
                 ...   
VPH_SINRTV       object
VPH_SINLTC       object
VPH_SINCINT      object
VPH_SINTIC       object
geometry       geometry
Length: 239, dtype: object

In [20]:
# All columns
columns_list = list(censo_gdf.columns)
# Columns which we are not interested in changing
loc_str_cols = ['CVEGEO','CVE_ENT','CVE_MUN','CVE_LOC','CVE_AGEB','CVE_MZA',
                'AMBITO','TIPOMZA','ENTIDAD','NOM_ENT','MUN','NOM_MUN','LOC','NOM_LOC','AGEB','MZA','geometry']

for col in columns_list:
    # Skip loc_str_cols
    if col in loc_str_cols:
        continue
    # Transform to numeric column
    censo_gdf[col] = censo_gdf[col].astype(float)
    
# Show
censo_gdf.dtypes

CVEGEO           object
CVE_ENT          object
CVE_MUN          object
CVE_LOC          object
CVE_AGEB         object
                 ...   
VPH_SINRTV      float64
VPH_SINLTC      float64
VPH_SINCINT     float64
VPH_SINTIC      float64
geometry       geometry
Length: 239, dtype: object

### Save data

In [22]:
censo_gdf.to_file(output_dir)