In [1]:
#import csv
#from dbfread import DBF 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
from shapely.geometry import LineString, mapping, shape, Point
import fiona

In [2]:
#dbf_file = DBF('OD 2017/OD_2017_v1.dbf')

In [3]:
'''with open('OD_2017_original.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(dbf_file.field_names)
    for record in dbf_file:
        writer.writerow(list(record.values()))'''

"with open('OD_2017_original.csv', 'w', newline='') as csvfile:\n    writer = csv.writer(csvfile)\n    writer.writerow(dbf_file.field_names)\n    for record in dbf_file:\n        writer.writerow(list(record.values()))"

In [4]:
def transforma_em_string(dataframe,tipo_original):
    for col in dataframe.select_dtypes(include=[tipo_original]):
        dataframe[col] = dataframe[col].astype('string')
    return dataframe

def cria_emissoes_kg_co2_km(x):
    if x <= 3:
        return 3.16
    elif 3 < x <= 8:
        return 1.28
    elif 8 < x <= 12:
        return 0.19
    elif 12 < x <= 14:
        return 0.07
    else:
        return 0
    
def cria_emissoes_kg_co2_km_pass(x):
    if x <= 3:
        return 0.0035
    elif 3 < x <= 8:
        return 0.0160
    elif 8 < x <= 12:
        return 0.1268
    elif 12 < x <= 14:
        return 0.0711
    else:
        return 0

def categoriza_modo_principal(x):
    if x <= 3:
        return "metrô, trem ou monotrilho"
    elif 3 < x <= 8:
        return "ônibus ou van"
    elif 8 < x <= 12:
        return "automovel"
    elif 12 < x <= 14:
        return "moto"
    else:
        return "bicileta ou a pé"

def cria_emissao_co2_total(dataframe, referencia, fator_expansao='FE_VIA', distancia='DISTANCIA'):
    return dataframe[referencia]*dataframe[fator_expansao]*dataframe[distancia]

def remove_percentil(dataframe, column, percentile=0.95):
    percentil = dataframe[column].quantile(percentile)
    return dataframe[dataframe[column] <= percentil]

def identifica_corrige_geometrias(geo_dataframe):
    invalid_geom = [not shape(g).is_valid for g in geo_dataframe.geometry] #Identifica geometrias invalidas

    for i, geom in enumerate(geo_dataframe.geometry):
        if not shape(geom).is_valid:
            geo_dataframe.geometry[i] = geom.buffer(0) #Coloca buffer a geometria:
    return geo_dataframe

def extrair_centroide(path):
    centroides = []

    with fiona.open(path) as shp:
        for feature in shp:
            feature_id = feature['id']
            geometry = shape(feature['geometry'])
            # Get the centroid and add it to the list with the ID
            centroid = geometry.centroid
            centroides.append([int(feature_id), centroid.x, centroid.y])
        dict_centroides = {item[0]: (item[1], item[2]) for item in centroides}
    return dict_centroides

def cria_linhas_geometria(dataframe, x1='CO_O_X', y1='CO_O_Y', x2='CO_D_X', y2='CO_D_Y'):
    geometria = [LineString([(row[x1], row[y1]), (row[x2], row[y2])]) for idx, row in dataframe.iterrows()]
    return geometria


In [5]:
dict_centroides = extrair_centroide('GIS/Zonas_2017_region.shp')
#df_zonas = pd.DataFrame(centroides, columns=['ZONA', 'ZONA_X', 'ZONA_Y'])

In [6]:
rel_distritos = pd.read_csv('zonas e distritos.csv')
dict_distritos = rel_distritos[['NumeroZona', 'NomeDistri', 'CO_DIST_X', 'CO_DIST_Y']].to_dict(orient='records')           

In [7]:
dict_distritos

[{'NumeroZona': 1,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 2,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 3,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 4,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 5,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 6,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 7,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  'CO_DIST_Y': 7397306.936744695},
 {'NumeroZona': 8,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  'CO_DIST_Y': 7397306.936744695},
 {'NumeroZona': 9,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  '

In [8]:
base = pd.read_csv('OD_2017_original.csv')

In [9]:
#A base original possui apenas variáveis numéricas dos tipos int e float. Algumas dessas variáveis numéricas são códigos descritos na documentação
base.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183092 entries, 0 to 183091
Data columns (total 128 columns):
 #    Column      Dtype  
---   ------      -----  
 0    ZONA        int64  
 1    MUNI_DOM    int64  
 2    CO_DOM_X    int64  
 3    CO_DOM_Y    int64  
 4    ID_DOM      int64  
 5    F_DOM       int64  
 6    FE_DOM      float64
 7    DOM         int64  
 8    CD_ENTRE    int64  
 9    DATA        int64  
 10   TIPO_DOM    int64  
 11   AGUA        int64  
 12   RUA_PAVI    int64  
 13   NO_MORAD    int64  
 14   TOT_FAM     int64  
 15   ID_FAM      int64  
 16   F_FAM       int64  
 17   FE_FAM      float64
 18   FAMILIA     int64  
 19   NO_MORAF    int64  
 20   CONDMORA    int64  
 21   QT_BANHO    float64
 22   QT_EMPRE    float64
 23   QT_AUTO     float64
 24   QT_MICRO    float64
 25   QT_LAVALOU  float64
 26   QT_GEL1     float64
 27   QT_GEL2     float64
 28   QT_FREEZ    float64
 29   QT_MLAVA    float64
 30   QT_DVD      float64
 31   QT_MICROON  float64
 32 

<>Removendo colunas desinteressantes para o trabalho<>
<>Os detalhes sobre cada coluna estão disponíveis no arquivo LAYOUT OD2017_v1.xlsx<>

In [10]:
colunas_removidas = ['ID_DOM', 'F_DOM', 'DOM', 'CD_ENTRE', 'DATA', 'AGUA', 'RUA_PAVI', 'ID_FAM', 'F_FAM', 'QT_BANHO', 'QT_EMPRE',
                     'QT_MICRO', 'QT_LAVALOU', 'QT_GEL1', 'QT_GEL2', 'QT_FREEZ', 'QT_MLAVA', 'QT_DVD', 'QT_MICROON', 'QT_SECAROU',
                     'NAO_DCL_IT', 'PONTO_BR', 'CD_RENFA', 'ID_PESS', 'F_PESS', 'PESSOA', 'MUNIESC', 'VINC1', 'VINC2',
                     'DIA_SEM', 'PAG_VIAG', 'TP_ESAUTO', 'VL_EST', 'VIA_BICI', 'TP_ESBICI']


colunas_categoricas = ['ZONA', 'MUNI_DOM', 'TIPO_DOM', 'CONDMORA', 'CRITERIOBR', 'SIT_FAM', 'SEXO', 'ESTUDA', 'GRAU_INS', 'CD_ATIVI', 'CO_REN_I', 'TRAB1_RE', 'TRABEXT1', 'OCUP1',
                       'SETOR1', 'MUNITRA1', 'MUNITRA2', 'TRAB2_RE', 'TRABEXT2', 'OCUP2', 'SETOR2', 'ZONA_O', 'MUNI_O', 'ZONA_D', 'MUNI_D', 'ZONA_T1', 'MUNI_T1', 'ZONA_T2', 'MUNI_T2', 'ZONA_T3',
                       'MUNI_T3', 'MOTIVO_O', 'MOTIVO_D', 'MOT_SRES', 'SERVIR_O', 'SERVIR_D', 'MODO1', 'MODO2', 'MODO3', 'MODO4', 'MODOPRIN', 'TIPVG', 'PE_BICI']

df = base.drop(colunas_removidas, axis=1)
df[colunas_categoricas] = df[colunas_categoricas].astype('category'); 

In [11]:
#Removendo colunas com muitos valores nulos
col_nulos = ['ANO_AUTO2', 'ANO_AUTO3', 'ZONA_ESC', 'CO_ESC_X', 'CO_ESC_Y', 'TIPO_ESC', 'ZONATRA2', 'MUNITRA2', 'CO_TR2_X', 'CO_TR2_Y', 'TRAB2_RE', 'TRABEXT2', 'OCUP2', 'SETOR2',
             'ZONA_T1', 'MUNI_T1', 'CO_T1_X', 'CO_T1_Y', 'ZONA_T2', 'MUNI_T2', 'CO_T2_X', 'CO_T2_Y', 'ZONA_T3', 'MUNI_T3', 'CO_T3_X', 'CO_T3_Y', 'MODO2', 'MODO3', 'MODO4', 'PE_BICI']


df = df.drop(col_nulos, axis=1)

In [86]:
dict_distritos

[{'NumeroZona': 1,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 2,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 3,
  'NomeDistri': 'Sé',
  'CO_DIST_X': 333497.39826272253,
  'CO_DIST_Y': 7394926.36723766},
 {'NumeroZona': 4,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 5,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 6,
  'NomeDistri': 'República',
  'CO_DIST_X': 332517.6745955143,
  'CO_DIST_Y': 7395218.86175466},
 {'NumeroZona': 7,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  'CO_DIST_Y': 7397306.936744695},
 {'NumeroZona': 8,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  'CO_DIST_Y': 7397306.936744695},
 {'NumeroZona': 9,
  'NomeDistri': 'Bom Retiro',
  'CO_DIST_X': 332921.7539951412,
  '

In [87]:
for index, row in df.iterrows():
    for dic in dict_distritos:
        if row['ZONA_O'] == dic['NumeroZona']:
            df.at[index, 'DIST_O_X'] = dic['CO_DIST_X']
            df.at[index, 'DIST_O_Y'] = dic['CO_DIST_Y']
        if row['ZONA_D'] == dic['NumeroZona']:
            df.at[index, 'DIST_D_X'] = dic['CO_DIST_X']
            df.at[index, 'DIST_D_Y'] = dic['CO_DIST_Y']

In [12]:
for index, row in df.iterrows():
    for key, value in dict_centroides.items():
        if row['ZONA_O'] == key:
            df.at[index, 'ZONA_O_X'] = value[0]
            df.at[index, 'ZONA_O_Y'] = value[1]
        if row['ZONA_D'] == key:
            df.at[index, 'ZONA_D_X'] = value[0]
            df.at[index, 'ZONA_D_Y'] = value[1]

In [91]:
df

Unnamed: 0,ZONA,MUNI_DOM,CO_DOM_X,CO_DOM_Y,FE_DOM,TIPO_DOM,NO_MORAD,TOT_FAM,FE_FAM,FAMILIA,...,ZONA_D_X,ZONA_D_Y,DIST_O_X,DIST_O_Y,DIST_dicX,DIST_dicY,ref_emissoes_kg_co2_km,ref_emissoes_kg_co2_km_pass,total_emissoes_kg_co2,total_emissoes_kg_co2_pass
0,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,332449.199127,7.394473e+06,333497.398263,7.394926e+06,333497.398263,7.394926e+06,0.00,0.0000,0.000000,0.000000
1,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,333667.400883,7.395306e+06,333497.398263,7.394926e+06,333497.398263,7.394926e+06,0.00,0.0000,0.000000,0.000000
2,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,327650.555958,7.394134e+06,333497.398263,7.394926e+06,327683.307983,7.392771e+06,3.16,0.0035,396902.195344,439.606862
3,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,329812.015567,7.395625e+06,327683.307983,7.392771e+06,328442.533940,7.395850e+06,3.16,0.0035,253192.020395,280.434200
4,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,333667.400883,7.395306e+06,328442.533940,7.395850e+06,333497.398263,7.394926e+06,0.19,0.1268,16351.248076,10912.306611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183087,517,25,294618,7410518,85.531250,1,4,1,85.531250,1,...,,,297152.258548,7.413209e+06,297152.258548,7.413209e+06,0.00,0.0000,0.000000,0.000000
183088,517,25,295243,7411456,52.500000,1,2,1,52.500000,1,...,,,297152.258548,7.413209e+06,297152.258548,7.413209e+06,0.00,0.0000,0.000000,0.000000
183089,517,25,295243,7411456,52.500000,1,2,1,52.500000,1,...,,,297152.258548,7.413209e+06,297152.258548,7.413209e+06,0.00,0.0000,0.000000,0.000000
183090,517,25,295243,7411456,52.500000,1,2,1,52.500000,1,...,,,297152.258548,7.413209e+06,297152.258548,7.413209e+06,0.00,0.0000,0.000000,0.000000


In [13]:
df['ref_emissoes_kg_co2_km'] = df['MODOPRIN'].apply(cria_emissoes_kg_co2_km)
df['ref_emissoes_kg_co2_km_pass'] = df['MODOPRIN'].apply(cria_emissoes_kg_co2_km_pass)
df['total_emissoes_kg_co2'] = cria_emissao_co2_total(dataframe=df, referencia='ref_emissoes_kg_co2_km')
df['total_emissoes_kg_co2_pass'] = cria_emissao_co2_total(dataframe=df, referencia='ref_emissoes_kg_co2_km_pass')
df['MODOPRIN'] = df['MODOPRIN'].apply(categoriza_modo_principal)

In [90]:
df_trens = df[df['MODOPRIN'] == 'metrô, trem ou monotrilho']
df_onibus = df[df['MODOPRIN'] == 'ônibus ou van']
df_automovel = df[df['MODOPRIN'] == 'automovel']
df_moto = df[df['MODOPRIN'] == 'moto']
df_bike_a_pe = df[df['MODOPRIN'] == 'bicileta ou a pé ']

In [58]:
#df_ajustada = remove_percentil(dataframe=df, column='total_emissoes_kg_co2_pass', percentile=0.95)

In [15]:
#geometria_total = cria_linhas_geometria(df)
geometria_zonas = cria_linhas_geometria(df, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')



'''geometria_total_trens = cria_linhas_geometria(df_trens)
geometria_total_onibus = cria_linhas_geometria(df_onibus)
geometria_total_automovel = cria_linhas_geometria(df_automovel)
geometria_total_moto = cria_linhas_geometria(df_moto)
geometria_total_bike_pe = cria_linhas_geometria(df_bike_a_pe)

geometria_zonas_trens = cria_linhas_geometria(df_trens, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')
geometria_zonas_onibus = cria_linhas_geometria(df_onibus, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')
geometria_zonas_automovel = cria_linhas_geometria(df_automovel, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')
geometria_zonas_moto = cria_linhas_geometria(df_moto, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')
geometria_zonas_bike_pe = cria_linhas_geometria(df_bike_a_pe, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')'''


"geometria_total_trens = cria_linhas_geometria(df_trens)\ngeometria_total_onibus = cria_linhas_geometria(df_onibus)\ngeometria_total_automovel = cria_linhas_geometria(df_automovel)\ngeometria_total_moto = cria_linhas_geometria(df_moto)\ngeometria_total_bike_pe = cria_linhas_geometria(df_bike_a_pe)\n\ngeometria_zonas_trens = cria_linhas_geometria(df_trens, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')\ngeometria_zonas_onibus = cria_linhas_geometria(df_onibus, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')\ngeometria_zonas_automovel = cria_linhas_geometria(df_automovel, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')\ngeometria_zonas_moto = cria_linhas_geometria(df_moto, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')\ngeometria_zonas_bike_pe = cria_linhas_geometria(df_bike_a_pe, x1='ZONA_O_X', y1='ZONA_O_Y', x2='ZONA_D_X', y2='ZONA_D_Y')"

In [16]:
'''gdf_base = gdf = gpd.GeoDataFrame(df, geometry=geometria_total)
gdf_base = transforma_em_string(gdf_base, 'category')'''
gdf_zonas = gpd.GeoDataFrame(df, geometry=geometria_zonas)
gdf_zonas = transforma_em_string(gdf_zonas, 'category')

'''gdf_trens = gpd.GeoDataFrame(df_trens, geometry=geometria_zonas_trens)
gdf_onibus = gpd.GeoDataFrame(df_onibus, geometry=geometria_zonas_onibus)
gdf_automovel = gpd.GeoDataFrame(df_automovel, geometry=geometria_zonas_automovel)
gdf_moto = gpd.GeoDataFrame(df_moto, geometry=geometria_zonas_moto)
gdf_bike_pe = gpd.GeoDataFrame(df_bike_a_pe, geometry=geometria_zonas_bike_pe)'''

'gdf_trens = gpd.GeoDataFrame(df_trens, geometry=geometria_zonas_trens)\ngdf_onibus = gpd.GeoDataFrame(df_onibus, geometry=geometria_zonas_onibus)\ngdf_automovel = gpd.GeoDataFrame(df_automovel, geometry=geometria_zonas_automovel)\ngdf_moto = gpd.GeoDataFrame(df_moto, geometry=geometria_zonas_moto)\ngdf_bike_pe = gpd.GeoDataFrame(df_bike_a_pe, geometry=geometria_zonas_bike_pe)'

In [None]:
gdf_zonas.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 183092 entries, 0 to 183091
Data columns (total 72 columns):
 #   Column                       Non-Null Count   Dtype   
---  ------                       --------------   -----   
 0   ZONA                         183092 non-null  string  
 1   MUNI_DOM                     183092 non-null  string  
 2   CO_DOM_X                     183092 non-null  int64   
 3   CO_DOM_Y                     183092 non-null  int64   
 4   FE_DOM                       183092 non-null  float64 
 5   TIPO_DOM                     183092 non-null  string  
 6   NO_MORAD                     183092 non-null  int64   
 7   TOT_FAM                      183092 non-null  int64   
 8   FE_FAM                       183092 non-null  float64 
 9   FAMILIA                      183092 non-null  int64   
 10  NO_MORAF                     183092 non-null  int64   
 11  CONDMORA                     183092 non-null  string  
 12  QT_AUTO                      178930 

In [17]:
#for dataframe in [gdf_trens, gdf_onibus, gdf_automovel, gdf_moto, gdf_bike_pe]:
#    dataframe = transforma_em_string(dataframe, 'category')
#    dataframe = identifica_corrige_geometrias(dataframe)

# Exportando o shapefile
gdf_zonas.crs = {'init': 'epsg:4326'}
gdf_zonas = transforma_em_string(gdf_zonas, 'category')
gdf_zonas = identifica_corrige_geometrias(gdf_zonas)

gdf_zonas.to_file('OD_Zonas_Zonas_Lines.shp', driver='ESRI Shapefile')
#gdf_trens.to_file('OD_Trens_Zonas_Lines.shp', driver='ESRI Shapefile')
#gdf_onibus.to_file('OD_Onibus_Zonas_Lines.shp', driver='ESRI Shapefile')
#gdf_automovel.to_file('OD_Automovel_Zonas_Lines.shp', driver='ESRI Shapefile')
#gdf_moto.to_file('OD_Moto_Zonas_Lines.shp', driver='ESRI Shapefile')
#gdf_bike_pe.to_file('OD_Bike_Zonas_Lines.shp', driver='ESRI Shapefile')

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  gdf_zonas.to_file('OD_Zonas_Zonas_Lines.shp', driver='ESRI Shapefile')


In [18]:
fig, axs = plt.subplots(figsize=(10,10), ncols=1, nrows=2)
gdf_zonas.plot(column='total_emissoes_kg_co2_pass', ax=axs[0], linewidth=0.002)
#total_emissoes = sns.histplot(data=df_ajustada, x='total_emissoes_kg_co2_pass', hue='MODOPRIN')

ValueError: 'box_aspect' and 'fig_aspect' must be positive

Error in callback <function _draw_all_if_interactive at 0x0000028BD6218940> (for post_execute):


ValueError: 'box_aspect' and 'fig_aspect' must be positive

ValueError: 'box_aspect' and 'fig_aspect' must be positive

<Figure size 1000x1000 with 2 Axes>

In [None]:
df.head()

Unnamed: 0,ZONA,MUNI_DOM,CO_DOM_X,CO_DOM_Y,FE_DOM,TIPO_DOM,NO_MORAD,TOT_FAM,FE_FAM,FAMILIA,...,H_CHEG,MIN_CHEG,ANDA_D,DURACAO,MODOPRIN,TIPVG,DISTANCIA,ID_ORDEM,emissoes_kg_co2_km,emissoes_kg_co2_km_pass
0,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,5.0,55.0,,10.0,16.0,3.0,639.13,1,0.0,0.0
1,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,15.0,55.0,,10.0,16.0,3.0,639.13,2,0.0,0.0
2,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,9.0,50.0,20.0,50.0,1.0,1.0,6651.77,3,3.16,0.0035
3,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,18.0,0.0,1.0,60.0,1.0,1.0,4243.3,4,3.16,0.0035
4,1,36,333743,7394463,15.416667,1,2,1,15.416667,1,...,23.0,30.0,1.0,40.0,12.0,2.0,4557.62,5,0.19,0.1268


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183092 entries, 0 to 183091
Data columns (total 63 columns):
 #   Column      Non-Null Count   Dtype   
---  ------      --------------   -----   
 0   ZONA        183092 non-null  category
 1   MUNI_DOM    183092 non-null  category
 2   CO_DOM_X    183092 non-null  int64   
 3   CO_DOM_Y    183092 non-null  int64   
 4   FE_DOM      183092 non-null  float64 
 5   TIPO_DOM    183092 non-null  category
 6   NO_MORAD    183092 non-null  int64   
 7   TOT_FAM     183092 non-null  int64   
 8   FE_FAM      183092 non-null  float64 
 9   FAMILIA     183092 non-null  int64   
 10  NO_MORAF    183092 non-null  int64   
 11  CONDMORA    183092 non-null  category
 12  QT_AUTO     178930 non-null  float64 
 13  QT_MOTO     178898 non-null  float64 
 14  QT_BICICLE  178886 non-null  float64 
 15  CRITERIOBR  178886 non-null  category
 16  ANO_AUTO1   115248 non-null  float64 
 17  RENDA_FA    183092 non-null  float64 
 18  FE_PESS     183092 non-n