# Pré-processamento dos dados

In [1]:
! pip install geopandas
! pip install fiona==1.9.6
! pip install dash dash-leaflet geopandas pandas

# ! pip install --upgrade geopandas
# ! pip show geopandas
# ! pip show fiona

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting fiona==1.9.6
  Using cached fiona-1.9.6.tar.gz (411 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[3 lines of output][0m
  [31m   [0m CRITICAL:root:A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
  [31m   [0m [31m[end of output][0m
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
[?25h[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mGetting requirements to build wheel[0m did 

In [None]:
# imports
import re
import math
import pandas as pd
import numpy as np
from pyproj import Transformer

In [9]:
# reprojetando os bairros

import geopandas as gpd

# Ler o arquivo original em EPSG:31983
gdf = gpd.read_file("BAIRRO_OFICIAL_bh.geojson")

# Reprojetar para EPSG:4326 (graus decimais)
gdf = gdf.to_crs("EPSG:4326")

# Salvar novo arquivo GeoJSON
gdf.to_file("BAIRRO_OFICIAL_bh_reprojetado.geojson", driver="GeoJSON")


## Fazendo o dataframe dos bares

In [None]:
# definindo as colunas - coluna coords_geo
df =  pd.read_csv("dados_com_coordenadas.csv") # Gerado pelo script_coordenadas

# define o transformador de UTM Zone 23S (EPSG:31983) para WGS 84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:31983", "EPSG:4326", always_xy=True)

def converter_utm_para_latlon(geom_str):
    match = re.search(r"POINT \((-?[\d\.]+) (-?[\d\.]+)\)", geom_str)
    if match:
        x = float(match.group(1))
        y = float(match.group(2))
        lon, lat = transformer.transform(x, y)
        return (lat, lon)
    return None  

df["COORD_GEO"] = df["GEOMETRIA"].apply(converter_utm_para_latlon)

# print(df.dtypes)
# df.head()

In [None]:

for col in [
    "ID_ATIV_ECON_ESTABELECIMENTO",
    "CNAE_PRINCIPAL",
    "DATA_INICIO_ATIVIDADE",
    "GEOMETRIA"
]:
    df[col] = df[col].replace("indisponivel", "NA")

df["NOME_FANTASIA"] = df.apply(
    lambda row: row["NOME"] if row["NOME_FANTASIA"] == "indisponivel" else row["NOME_FANTASIA"],
    axis=1
)

df["ID_ATIV_ECON_ESTABELECIMENTO"] = df["ID_ATIV_ECON_ESTABELECIMENTO"].astype(str)
df["ID_CDB"] = df["CDB"].astype(int).apply(lambda x: f"{x:03}")
df.drop(columns=["CDB"], inplace=True)
df["CNAE_PRINCIPAL"] = df["CNAE_PRINCIPAL"].astype(str)

df["DATA_INICIO_ATIVIDADE"] = df["DATA_INICIO_ATIVIDADE"].apply(
    lambda x: pd.to_datetime(x, format="%d-%m-%Y").date() if x != "NA" else "NA"
)


for col in [
    "IND_POSSUI_ALVARA",
    "ENDERECO_COMPLETO",
    "NOME",
    "NOME_FANTASIA",
    "GEOMETRIA"
]:
    df[col] = df[col].astype(str)

def parse_coord_str(x):
    if isinstance(x, str):
        coords = tuple(map(float, x.strip().replace("(", "").replace(")", "").split(",")))
    elif isinstance(x, (tuple, list)):
        coords = x
    elif isinstance(x, float) or isinstance(x, int):
        coords = (float(x), float(x))
    else:
        coords = (float('nan'), float('nan'))
    
    return tuple(round(c, 7) for c in coords)

df["COORDS"] = df["COORDS"].apply(parse_coord_str)
df["COORD_GEO"] = df["COORD_GEO"].apply(parse_coord_str)



cols = df.columns.tolist()
cols.insert(1, cols.pop(cols.index("ID_CDB")))
df = df[cols]

# print(df.dtypes)
# df.head()

ID_ATIV_ECON_ESTABELECIMENTO    object
ID_CDB                          object
CNAE_PRINCIPAL                  object
DATA_INICIO_ATIVIDADE           object
IND_POSSUI_ALVARA               object
ENDERECO_COMPLETO               object
NOME                            object
NOME_FANTASIA                   object
GEOMETRIA                       object
COORDS                          object
COORD_GEO                       object
dtype: object


In [73]:
df.to_csv("complete_bar_data.csv", index=False, encoding="utf-8")

### Buscando os dados do Comida di Buteco cuja a API não encontrou as coordenadas

In [None]:
def encontrar_bares_sem_coords(df):
    def parse_coord(x):
        if isinstance(x, tuple):
            return x
        if isinstance(x, str):
            x = x.strip().replace("(", "").replace(")", "")
            parts = x.split(",")
            if len(parts) != 2:
                return (np.nan, np.nan)
            try:
                return (float(parts[0]), float(parts[1]))
            except:
                return (np.nan, np.nan)
        return (np.nan, np.nan)

    def is_nan_tuple(t):
        return (
            isinstance(t, tuple) and len(t) == 2 and
            math.isnan(t[0]) and math.isnan(t[1])
        )

    df["ID_CDB"] = df["ID_CDB"].astype(str)

    df["COORDS_parsed"] = df["COORDS"].apply(parse_coord)
    df["COORD_GEO_parsed"] = df["COORD_GEO"].apply(parse_coord)

    # Filtros
    filtro_cdb = df["ID_CDB"] != "000"
    filtro_coords_nan = df["COORDS_parsed"].apply(is_nan_tuple)
    filtro_coordgeo_nan = df["COORD_GEO_parsed"].apply(is_nan_tuple)

    resultado = df.loc[
        filtro_cdb & filtro_coords_nan & filtro_coordgeo_nan,
        ["NOME", "ENDERECO_COMPLETO"]
    ].reset_index(drop=True)

    df.drop(columns=["COORDS_parsed", "COORD_GEO_parsed"], inplace=True)

    return resultado


In [5]:
df = pd.read_csv("complete_bar_data.csv")
resultado = encontrar_bares_sem_coords(df)
print(resultado)

                     NOME                                  ENDERECO_COMPLETO
0              MAMUTE BAR  RUA BRAS CUBAS, 116, CRUZEIRO, BELO HORIZONTE,...
1          CONECTADOS BAR  AVENIDA FRANCISCO SA, 280, PRADO, BELO HORIZON...
2               S.O.S PUB  AVENIDA PADRE JOSE MAURICIO, 1126, VISTA ALEGR...
3      KÖBES EMPORIUM BAR  RUA PROFESSOR RAIMUNDO NONATO, 31, SANTA TEREZ...
4            BUTECO'S BAR  RUA ERNESTO BRAGA, 2, JARDIM ATLANTICO, BELO H...
5       DECK BOI NA BRASA  RUA DESEMBARGADOR JOSÉ SATYRO, 302, MANACAS, B...
6            BAR DO ROMEU  RUA ANIRY, 8, GUARANI, BELO HORIZONTE, MG, BRASIL
7       COMPANHIA DO DINO  RUA JOAQUIM CLEMENTE, 682, FLORAMAR, BELO HORI...
8          ANDRADE'S BEER  RUA DONA GENI, 32, VENDA NOVA, BELO HORIZONTE,...
9      TROPEIRO DO LISBOA  AVENIDA LEONTINO FRANCISCO ALVES, 506, SERRA V...
10   ESPETINHOS DO PAULÃO  RUA ALVARO MATA, 466, NOVA CACHOEIRINHA, BELO ...
11                  MULÃO  RUA MARAMBAIA, 291, CAICARAS, BELO HORIZONTE, ...

### Ajusta manual
Pesquisei no Google Maps e adicionei manualmente ao `complete_bar_data.csv`, assim garantimos que pelo menos COORDS ou COORD_GEO tem alguma coordenada, logo sempre conseguimos printar marcadores para os bares.

BUTECO'S BAR: Rua Ernesto Braga 02 Jardim Atlântico, Belo Horizonte -> -19.8478398,-43.9870588  
DECK BOI NA BRASA: Rua Desembargador José Satyro, 302 - Castelo, Belo Horizonte -> -19.8876361!4d-43.9964471  
COMPANHIA DO DINO: Avenida Joaquim Clemente, 682 - Floramar, Belo Horizonte -> -19.8372406!4d-43.9332652  

In [None]:
# vamos usar os bares indexados pelo ID

df = pd.read_csv("complete_bar_data.csv")

df["ID_ATIV_ECON_ESTABELECIMENTO"] = df["ID_ATIV_ECON_ESTABELECIMENTO"].fillna(df["ID_CDB"])
df["ID_ATIV_ECON_ESTABELECIMENTO"] = df["ID_ATIV_ECON_ESTABELECIMENTO"].astype(float).astype(int).astype(str)
df = df.set_index("ID_ATIV_ECON_ESTABELECIMENTO")

# Salva mantendo o índice (que é importante aqui!)
df.to_csv("complete_bar_data1.csv", index=True)
