##### Import libs

In [48]:
import pandas as pd
from decouple import config
import numpy as np
import ast
from geopy.geocoders import Nominatim
# Load enviroment variables.
ROOT_PATH       = config('ROOT_PATH')
DATA_RAW_PATH   = config('DATA_RAW_PATH')
pd.options.display.max_columns = 100

##### Methods

In [49]:
def get_geolocation (address:str)-> tuple[float,float] :
    '''
        This method receives an address and return your geolocation.

        Parameters : 
            address = A address to take geolocation
        Ex :
            return_geolocation('Rua José Gáspar')
        Returns :
            (-23.6624262, -46.6441204)
    '''
    geolocator = Nominatim(user_agent='foursquare_agent')
    location = geolocator.geocode(address)
    lat = location.latitude
    long = location.longitude
    return lat, long

In [None]:
def mount_filtered_dataframe (dataframe:pd.DataFrame, city_to_search:str,list_neighborhood:list)-> pd.DataFrame :
    '''
        This method receives a dataframe, city and list of neighborhood to looking for.

        Parameters : 
            dataframe : A dataframe with data to filter
            city_to_search : A name of city to search properties
            list_neighborhood : A list that contains the neighborhoods to looking for.
        Ex :
            neighborhoods = [
                'Bela Vista','Bom Retiro','Cambuci','Consolação',
                'Sé','Higienópolis','Liberdade','Santa Cecília'
                ]
            mount_filtered_dataframe(data_tmp, 'São Paulo', neighborhoods)
        Returns :
            dataframe with filtered data.
    '''
    dataframe_ = pd.DataFrame()
    for neighborhood in list_neighborhood:
        dataframe_ = pd.concat([dataframe_,dataframe.loc[(dataframe['city'] == city_to_search) & (dataframe['neighborhood'] == neighborhood)]],axis=0)
    return dataframe_.reset_index(drop=True)

##### Load data

In [50]:
data_tmp = pd.read_csv(ROOT_PATH+DATA_RAW_PATH+'dataframe_merged.csv')
data_raw = pd.read_csv(ROOT_PATH+DATA_RAW_PATH+'dataframe_raw_properties.csv')

  data_raw = pd.read_csv(ROOT_PATH+DATA_RAW_PATH+'dataframe_raw_properties.csv')


##### Processing data

In [51]:
# Transforming string in dict
data_raw['address'] = data_raw['address'].apply(lambda x: ast.literal_eval(x))
# Creating columns
data_raw['street'] = data_raw['address'].apply(lambda x: x['street'])
data_raw['neighborhood'] = data_raw['address'].apply(lambda x: x.get('neighborhood',np.nan))

##### Organizing data

In [52]:
# Reorganizing data
data_tmp.insert(2,'lat',data_tmp.pop('lat'))
data_tmp

Unnamed: 0,type,id,lat,lon,BANHEIRA_DE_HIDROMASSAGEM,BOX,VARANDA,PISCINA_PRIVATIVA,ARMARIOS_EMBUTIDOS_NO_QUARTO,ARMARIOS_NOS_BANHEIROS,ARMARIOS_NA_COZINHA,AR_CONDICIONADO,CHUVEIRO_A_GAS,QUARTO_DE_SERVICO,BANHEIRO_DE_SERVICO,VARANDA_GOURMET,APARTAMENTO_COBERTURA,QUARTO_EXTRA_REVERSIVEL,FOGAO_INCLUSO,GELADEIRA_INCLUSO,BANHEIRO_ADAPTADO,CLOSET,COZINHA_AMERICANA,MESAS_E_CADEIRAS_DE_ESCRITORIO,JARDIM,QUARTOS_E_CORREDORES_COM_PORTAS_AMPLAS,QUINTAL,SOMENTE_UMA_CASA_NO_TERRENO,AREA_DE_SERVICO
0,house,892986759,-23.663303,-46.642494,NAO,NAO,NAO_INFORMADO,NAO,NAO,NAO,SIM,NAO,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,SIM
1,house,893513303,-23.671671,-46.691886,NAO,SIM,NAO,NAO,NAO,SIM,SIM,NAO,NAO,SIM,SIM,NAO,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM,SIM,SIM
2,house,893567381,-23.462255,-46.555363,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM
3,house,893426340,-23.554803,-46.596351,SIM,SIM,NAO,NAO,NAO,SIM,SIM,NAO,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO
4,house,893475416,-23.675248,-46.609885,NAO,NAO,SIM,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9974,house,893703984,-23.540389,-46.652610,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM
9975,house,893697876,-23.494918,-46.740010,NAO,SIM,NAO,SIM,NAO,NAO,SIM,NAO,NAO,SIM,SIM,SIM,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO
9976,house,893669474,-23.587723,-46.582399,NAO,NAO,SIM,NAO,SIM,SIM,SIM,NAO,NAO,SIM,SIM,NAO,NAO,SIM,NAO,NAO,SIM,NAO,NAO,NAO,SIM,SIM,SIM,SIM,SIM
9977,house,893688435,-23.625045,-46.688631,NAO,SIM,NAO_INFORMADO,NAO,SIM,SIM,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO


In [53]:
# Creating a unique dataset with relevant data
mask = [
    'id','bedrooms', 'city','bathrooms',
    'iptu','area','rentPrice',
    'type','iptuType','totalCost',
    'parkingSpaces','floor','suites',
    'street','neighborhood']
data_tmp = data_tmp.merge(data_raw[mask],how='left', right_on='id',left_on='id').drop(['type_x'],axis=1).rename(columns={'type_y':'type'})

##### Creating dataset with properties of the São Paulo center

In [54]:
# Creating a list with neighborhoods of São Paulo center.
neighborhoods = [
    'Bela Vista','Bom Retiro','Cambuci','Consolação',
    'Sé','Higienópolis','Liberdade','Santa Cecília']

In [56]:
data_ = mount_filtered_dataframe(data_tmp, 'São Paulo', neighborhoods)

In [57]:
data_

Unnamed: 0,id,lat,lon,BANHEIRA_DE_HIDROMASSAGEM,BOX,VARANDA,PISCINA_PRIVATIVA,ARMARIOS_EMBUTIDOS_NO_QUARTO,ARMARIOS_NOS_BANHEIROS,ARMARIOS_NA_COZINHA,AR_CONDICIONADO,CHUVEIRO_A_GAS,QUARTO_DE_SERVICO,BANHEIRO_DE_SERVICO,VARANDA_GOURMET,APARTAMENTO_COBERTURA,QUARTO_EXTRA_REVERSIVEL,FOGAO_INCLUSO,GELADEIRA_INCLUSO,BANHEIRO_ADAPTADO,CLOSET,COZINHA_AMERICANA,MESAS_E_CADEIRAS_DE_ESCRITORIO,JARDIM,QUARTOS_E_CORREDORES_COM_PORTAS_AMPLAS,QUINTAL,SOMENTE_UMA_CASA_NO_TERRENO,AREA_DE_SERVICO,bedrooms,city,bathrooms,iptu,area,rentPrice,type,iptuType,totalCost,parkingSpaces,floor,suites,street,neighborhood
0,892790963,-23.555161,-46.641136,NAO,SIM,NAO_INFORMADO,NAO,NAO,NAO,SIM,NAO,SIM,NAO,NAO,NAO,NAO,NAO,SIM,SIM,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,2.0,São Paulo,1.0,81.0,70.0,2200.0,Apartamento,Normal,3157.0,0.0,13.0,0.0,Rua Jaceguai,Bela Vista
1,893517945,-23.568121,-46.645794,NAO,SIM,SIM,NAO,SIM,SIM,SIM,SIM,SIM,NAO,NAO,NAO,NAO,NAO,SIM,SIM,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,SIM,1.0,São Paulo,1.0,375.0,76.0,5090.0,StudioOuKitchenette,Normal,7641.0,1.0,14.0,1.0,Rua Cincinato Braga,Bela Vista
2,893262856,-23.563911,-46.643285,NAO,SIM,NAO_INFORMADO,NAO,SIM,SIM,SIM,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,SIM,3.0,São Paulo,4.0,350.0,185.0,6750.0,Apartamento,Normal,9234.0,1.0,5.0,1.0,Rua Artur Prado,Bela Vista
3,893362687,-23.557757,-46.651662,NAO,SIM,NAO_INFORMADO,NAO,SIM,SIM,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,SIM,3.0,São Paulo,2.0,85.0,110.0,3400.0,Apartamento,Normal,4711.0,1.0,7.0,1.0,Rua Doutor Plínio Barreto,Bela Vista
4,893550529,-23.556566,-46.654117,NAO,SIM,NAO,NAO,SIM,SIM,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM,NAO,NAO,SIM,1.0,São Paulo,1.0,0.0,50.0,3750.0,Apartamento,NaoExiste,4750.0,1.0,8.0,1.0,Rua Barata Ribeiro,Bela Vista
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
815,893332194,-23.537155,-46.653981,NAO,SIM,NAO_INFORMADO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,SIM,SIM,SIM,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,1.0,São Paulo,1.0,47.0,43.0,1550.0,Apartamento,Normal,2198.0,0.0,1.0,0.0,Alameda Barros,Santa Cecília
816,893650305,-23.536810,-46.655461,SIM,SIM,SIM,SIM,SIM,SIM,SIM,NAO,SIM,SIM,SIM,SIM,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM,3.0,São Paulo,2.0,405.0,162.0,4000.0,Apartamento,Normal,6174.0,1.0,13.0,1.0,Alameda Barros,Santa Cecília
817,893256178,-23.534069,-46.659916,NAO,NAO,NAO_INFORMADO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,NAO_INFORMADO,1.0,São Paulo,1.0,125.0,30.0,2671.0,Apartamento,Normal,3388.0,1.0,11.0,0.0,Rua Conselheiro Brotero,Santa Cecília
818,892843144,-23.533266,-46.658983,NAO,SIM,NAO,NAO,NAO,NAO,SIM,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,NAO,SIM,NAO,NAO,SIM,NAO,NAO,SIM,1.0,São Paulo,1.0,2.0,60.0,2200.0,StudioOuKitchenette,Normal,2715.0,0.0,7.0,0.0,Rua Conselheiro Brotero,Santa Cecília
