In [73]:
#Carregamento das bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from unidecode import unidecode

In [74]:
# Carregamento do dataset
df = pd.read_csv('./Dataset/imovelbr.csv', sep=';', encoding='utf-8', low_memory=False)

In [75]:
#Pré-processamento dos dados
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].astype(str)                    # garante que é string
    df[col] = df[col].apply(lambda x: unidecode(x))  # remove acentos
    df[col] = df[col].str.upper()  

In [76]:
#Análise inicial dos dados
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35772 entries, 0 to 35771
Data columns (total 78 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   account.licenseNumber                 35772 non-null  object 
 1   account.name                          35772 non-null  object 
 2   imvl_type                             35772 non-null  object 
 3   listing.acceptExchange                35772 non-null  bool   
 4   listing.address.city                  35772 non-null  object 
 5   listing.address.confidence            35772 non-null  object 
 6   listing.address.country               35772 non-null  object 
 7   listing.address.level                 35772 non-null  object 
 8   listing.address.neighborhood          35772 non-null  object 
 9   listing.address.point.lat             35772 non-null  object 
 10  listing.address.point.lon             35772 non-null  object 
 11  listing.address

Unnamed: 0,account.licenseNumber,account.name,imvl_type,listing.acceptExchange,listing.address.city,listing.address.confidence,listing.address.country,listing.address.level,listing.address.neighborhood,listing.address.point.lat,...,listing.title,listing.totalAreas,listing.unitFloor,listing.unitSubTypes,listing.unitTypes,listing.unitsOnTheFloor,listing.updatedAt,listing.usableAreas,listing.usageTypes,type
0,04268-J-SP,ADI ASSESSORIA E IMOVEIS LTDA,APARTAMENTOS,False,SAO PAULO,VALID_STREET,BR,STREET,PARADA INGLESA,-23.493796,...,SAO PAULO - APARTAMENTO PADRAO - PARADA INGLESA,45,0,NORMAL,APARTMENT,0,2020-07-24T05:06:08.843Z,45,RESIDENTIAL,SUPERPREMIUM
1,00254-J-SC,SAN REMOS IMOVEIS,APARTAMENTOS,False,FLORIANOPOLIS,VALID_STREET,BR,STREET,AGRONOMICA,-27.576887,...,FLORIANOPOLIS - APARTAMENTO PADRAO - AGRONOMICA,140,0,NORMAL,APARTMENT,0,2020-07-12T11:43:29.976Z,140,RESIDENTIAL,SUPERPREMIUM
2,48861-F-RJ,FAMILIA BACELLAR IMOBILIARIA,APARTAMENTOS,False,RIO DE JANEIRO,VALID_STREET,BR,STREET,RECREIO DOS BANDEIRANTES,-23.018798,...,RIO DE JANEIRO - APARTAMENTO PADRAO - RECREIO ...,100,0,NORMAL,APARTMENT,0,2020-07-30T03:11:32.521Z,100,RESIDENTIAL,SUPERPREMIUM
3,17452-J-SP,NEXT SOLUCOES IMOBILIARIAS,APARTAMENTOS,False,CAMPINAS,VALID_STREET,BR,STREET,CAMBUI,-22.899193,...,CAMPINAS - APARTAMENTO PADRAO - CAMBUI,80,0,NORMAL,APARTMENT,0,2020-07-16T17:15:06.708Z,80,RESIDENTIAL,PREMIUM
4,24344-J-SP,QUINTO ANDAR SERVICOS IMOBILIARIOS LTDA,APARTAMENTOS,False,SAO PAULO,VALID_STREET,BR,STREET,IPIRANGA,-23.598672,...,SAO PAULO - APARTAMENTO PADRAO - IPIRANGA,69,0,NORMAL,APARTMENT,0,2020-07-29T04:32:26.854Z,69,RESIDENTIAL,PREMIUM


In [77]:
# Listagem das colunas do DataFrame
print(df.columns.tolist())

['account.licenseNumber', 'account.name', 'imvl_type', 'listing.acceptExchange', 'listing.address.city', 'listing.address.confidence', 'listing.address.country', 'listing.address.level', 'listing.address.neighborhood', 'listing.address.point.lat', 'listing.address.point.lon', 'listing.address.point.source', 'listing.address.precision', 'listing.address.state', 'listing.address.street', 'listing.address.streetNumber', 'listing.address.zipCode', 'listing.address.zone', 'listing.advertiserId', 'listing.amenities', 'listing.backyard', 'listing.barbgrill', 'listing.bathrooms', 'listing.bathtub', 'listing.bedrooms', 'listing.businessTypeContext', 'listing.createdAt', 'listing.description', 'listing.displayAddressType', 'listing.externalId', 'listing.fireplace', 'listing.floors', 'listing.furnished', 'listing.garden', 'listing.guestpark', 'listing.gym', 'listing.hottub', 'listing.id', 'listing.isInactive', 'listing.legacyId', 'listing.link', 'listing.listingType', 'listing.mountainview', 'lis

In [78]:
# Renomear colunas para facilitar o acesso
df = df.rename(columns={
    'listing.address.point.lat': 'Latitude',
    'listing.address.point.lon': 'Longitude'
})


In [79]:
# Remover linhas com valores ausentes em Latitude e Longitude
df = df.dropna(subset=["Latitude", "Longitude"])

In [80]:
# Converter colunas de Latitude e Longitude para numérico
df['Latitude'] = pd.to_numeric(df['Latitude'], errors='coerce')
df['Longitude'] = pd.to_numeric(df['Longitude'], errors='coerce')

In [81]:
# Remover linhas com valores ausentes em Latitude e Longitude após conversão
df = df.dropna(subset=['Latitude', 'Longitude'])

In [82]:
# Verificar os tipos de dados das colunas relevantes
df[["Latitude", "Longitude", "listing.totalAreas"]].info()

<class 'pandas.core.frame.DataFrame'>
Index: 35634 entries, 0 to 35771
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude            35634 non-null  float64
 1   Longitude           35634 non-null  float64
 2   listing.totalAreas  35634 non-null  object 
dtypes: float64(2), object(1)
memory usage: 1.1+ MB


In [83]:
# Visualização dos dados geográficos usando Plotly Express
fig = px.scatter_map(
    df,
    lat="Latitude",
    lon="Longitude",
    color="listing.address.city",
    hover_name="listing.address.neighborhood",
    hover_data={
        "listing.address.city": True,
        "listing.address.state": True,
        "listing.totalAreas": True,
        "Latitude": False,
        "Longitude": False
    },
    zoom=4,
    height=600,
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(
    showlegend=False,
    title={
        'text': "Distribuição de Imóveis nas Principais Cidades Brasileiras",
        'x': 0.5,
        'xanchor': 'center'
    },
    margin={"r": 0, "t": 50, "l": 0, "b": 0}
)
fig.update_traces(
    marker=dict(size=8, opacity=0.7)
)
fig.show()