In [22]:
# initial setup
try:
    # settings colab:
    import google.colab
        
except ModuleNotFoundError:    
    # settings local:
    %run "../../../common/0_notebooks_base_setup.py"

pandas=1.0.3 already installed
matplotlib=2.2.2 already installed
bokeh=2.0.0 already installed
seaborn=0.10.0 already installed


In [23]:
import pandas as pd
import seaborn as sns
import re
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import chart_studio
import chart_studio.plotly as py
import unidecode as ud

In [24]:
pd.set_option('display.max_rows', 300)
pd.set_option('display.width', 5000)

In [25]:
import chardet
def get_encoding_type(csv_path):
    rawdata = open(csv_path, 'rb').read()
    result = chardet.detect(rawdata)
    return result.get('encoding')

In [26]:
data = pd.read_csv("properati.csv", encoding="iso-8859-1")

In [27]:
#Creo una copia del DF
data_clean = data.copy(deep=True)

In [28]:
#Columnas que son string
columnas_str = ['operation', 'property_type', 'place_name', 'place_with_parent_names', 'country_name', 'state_name', 'description', 'title']

In [29]:
#Le quito los espacios en blanco
for columna in columnas_str:
    data_clean[columna] = data_clean[columna].str.strip()

<span style="font-size:24px">OPERACION<span>

In [30]:
#Buscar nulos
data_clean["property_type"].isnull().sum()

0

In [31]:
#Expresión regular para buscar venta o alquiler
regex = re.compile("(venta)|(alquiler)", flags = re.IGNORECASE)

In [32]:
#Buscar Venta o Alquiler en description
regexOperacionesDescription = data_clean.description.apply(lambda x: regex.search(str(x)))
dfVentasDescription = regexOperacionesDescription.apply(lambda x: np.NaN if x is None else x.group(0))

#Normalizar
dfVentasDescription[dfVentasDescription.notnull()].unique()
dfVentasDescription.replace(["VENTA", "venta"], "Venta", inplace=True)
dfVentasDescription.replace(["alquiler", "ALQUILER"], "Alquiler", inplace=True)
dfVentasDescription[dfVentasDescription.notnull()].unique()

#Crear columna Operacion
data_clean["Operacion_Description"] = dfVentasDescription
data_clean["Operacion_Description"].value_counts()

Venta       4624
Alquiler     101
Name: Operacion_Description, dtype: int64

In [33]:
#Buscar Venta o Alquiler en title
regexOperacionesTitle = data_clean.title.apply(lambda x: regex.search(str(x)))
dfVentasTitle = regexOperacionesTitle.apply(lambda x: np.NaN if x is None else x.group(0))

#Normalizar
dfVentasTitle[dfVentasTitle.notnull()].unique()
dfVentasTitle.replace(["VENTA", "venta"], "Venta", inplace=True)
dfVentasTitle.replace(["alquiler", "ALQUILER"], "Alquiler", inplace=True)
dfVentasTitle[dfVentasTitle.notnull()].unique()

#Crear columna Title
data_clean["Operacion_Title"] = dfVentasTitle
data_clean["Operacion_Title"].value_counts()

Venta       6523
Alquiler      10
Name: Operacion_Title, dtype: int64

In [34]:
#Buscar Venta o Alquiler en properati_url
regexOperacionesURL = data_clean.properati_url.apply(lambda x: regex.search(str(x)))
dfVentasURL = regexOperacionesURL.apply(lambda x: np.NaN if x is None else x.group(0))

#Normalizar
dfVentasURL[dfVentasURL.notnull()].unique()
dfVentasURL.replace(["VENTA", "venta"], "Venta", inplace=True)
dfVentasURL.replace(["alquiler", "ALQUILER"], "Alquiler", inplace=True)
dfVentasURL[dfVentasURL.notnull()].unique()

#Crear columna URL
data_clean["Operacion_URL"] = dfVentasURL
data_clean["Operacion_URL"].value_counts()

Venta       12793
Alquiler       38
Name: Operacion_URL, dtype: int64

In [36]:
#Precios mínimos y máximos para alquiler
data_clean[data_clean.price_aprox_usd > 0].filter(items=["property_type", "price_aprox_usd"]).groupby(by="property_type").agg(['min', 'max'])

Unnamed: 0_level_0,price_aprox_usd,price_aprox_usd
Unnamed: 0_level_1,min,max
property_type,Unnamed: 1_level_2,Unnamed: 2_level_2
PH,14020.91,2750000.0
apartment,5000.0,4000000.0
house,7010.45,11000000.0
store,8412.55,4600000.0


In [37]:
#Revisar los casos con precios menores a 10.000 para ver que no sean alquileres
data_clean[(data_clean.property_type == "house") & (data_clean.price_aprox_usd < 10000)].filter(items=["price_aprox_usd", "description"])

Unnamed: 0,price_aprox_usd,description
7440,7010.45,"EXCELENTE CABAÃA 3 DORM, 2 BAÃOS, PARQUE, A ..."
9665,9534.22,CODIGO: 7 ubicado en: Juana Manuela Gorritti -...
9761,0.0,Casa en Venta de 2 dorm. en Armenia
12372,8412.55,FRACCION DE TERRENO AL FONDOA SUBDIVIDIR. AL F...


In [38]:
#Revisar los casos con precios menores a 10.000 para ver que no sean alquileres
data_clean[(data_clean.property_type == "apartment") & (data_clean.price_aprox_usd < 10000)].filter(items=["price_aprox_usd", "description"])

Unnamed: 0,price_aprox_usd,description
4399,5000.0,Corredor Responsable: Mauro Marvisi - CMCPSI 5...
6720,5608.31,BV Rondeau / Nancen: A metros del Shoping Port...


In [41]:
#Revisar los casos con precios menores a 10.000 para ver que no sean alquileres
data_clean[(data_clean.property_type == "store") & (data_clean.price_aprox_usd < 50000)].filter(items=["price_aprox_usd", "description"])

Unnamed: 0,price_aprox_usd,description
803,38000.0,Venta de FONDO DE COMERCIO - PLAYA DE ESTACION...
836,8412.55,"Venta de Negocio en Tolosa, La Plata116 entre..."
4598,26500.0,LOCAL 26 METROS CUADRADOS 30 CUOTAS DE U$S 735...
5165,21423.96,"EDIFICIO FLORENTINA - SANTIAGO 880, ROSARIO EN..."
5416,33650.2,"Venta de Negocio , CipollettiLIMARTI PROPIEDAD..."
5505,45000.0,CODIGO: 1008-LOC1478 ubicado en: GARCIA 147 - ...
5533,37000.0,Local en Galeria Shopping Victoria a metros de...
6335,35000.0,Corredor Responsable: Uno Bienes Raices SRL - ...
6371,36454.38,Ubicado en una esquina estratÃ©gica del barrio...
6446,35000.0,"LOCALVenta de Local en Centro / Microcentro, C..."


In [47]:
regex = re.compile("venta de negocio|fondo de comercio", flags = re.IGNORECASE)
regexLocal = data_clean[(data_clean.property_type == "store")].description.apply(lambda x: regex.search(str(x)))
locales = regexLocal[regexLocal.notnull()].apply(lambda x: np.NaN if x is None else x.group(0))
data_clean.loc[locales.index, "property_type"] = "Fondo de Comercio"

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,rooms,expenses,properati_url,description,title,image_thumbnail,Operacion_Description,Operacion_Title,Operacion_URL,Operacion
803,803,sell,Fondo de Comercio,Ramos MejÃ­a,|Argentina|Bs.As. G.B.A. Zona Oeste|La Matanza...,Argentina,Bs.As. G.B.A. Zona Oeste,3429617.0,"-34.6394558,-58.5536377",-34.639456,...,,,http://www.properati.com.ar/15g0q_venta_local_...,Venta de FONDO DE COMERCIO - PLAYA DE ESTACION...,NEGOCIO EN VENTA,https://thumbs4.properati.com/7/zwePHS6ujpG-B5...,Venta,Venta,Venta,Venta
836,836,sell,Fondo de Comercio,Tolosa,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|Tol...,Argentina,Bs.As. G.B.A. Zona Sur,3427714.0,"-34.8866047479,-57.9694639519",-34.886605,...,1.0,,http://www.properati.com.ar/15g6s_venta_local_...,"Venta de Negocio en Tolosa, La Plata116 entre...",NEGOCIO EN VENTA,https://thumbs4.properati.com/9/DHgL8aLPFIi1RC...,Venta,Venta,Venta,Venta
1603,1603,sell,Fondo de Comercio,San Francisco Solano,|Argentina|Bs.As. G.B.A. Zona Sur|Quilmes|San ...,Argentina,Bs.As. G.B.A. Zona Sur,3429053.0,"-34.8014208,-58.3033418",-34.801421,...,,,http://www.properati.com.ar/15iw8_venta_local_...,Corredor Responsable: FRANCISCO D'ATRI - CPMCA...,Venta Local de 1000 m2 en Avenida Donato Alvarez,https://thumbs4.properati.com/4/5G8Zv3hnVs9_dG...,Venta,Venta,Venta,Venta
1897,1897,sell,Fondo de Comercio,Merlo,|Argentina|San Luis|Merlo|,Argentina,San Luis,3844377.0,"-32.33863831,-65.01531982",-32.338638,...,,,http://www.properati.com.ar/15jk1_venta_local_...,Importante fondo de comercio Habilitado para v...,U$D 62.500 - Fondo de Comercio en Venta - Poet...,https://thumbs4.properati.com/1/En_1LrtWoDMvW6...,Venta,Venta,Venta,Venta
2965,2965,sell,Fondo de Comercio,Mendoza,|Argentina|Mendoza|Mendoza|,Argentina,Mendoza,3844421.0,"-32.8894577,-68.84584045",-32.889458,...,2.0,,http://www.properati.com.ar/15kq6_venta_local_...,Vendo fondo de comercio: Farmacia.Contacto: 26...,$ 1.300.000 - Fondo de Comercio en Venta - Far...,https://thumbs4.properati.com/7/Hmg9Qo87TBMeS3...,,Venta,Venta,Venta
2967,2967,sell,Fondo de Comercio,Godoy Cruz,|Argentina|Mendoza|Godoy Cruz|,Argentina,Mendoza,3854963.0,"-32.9417038,-68.82556915",-32.941704,...,2.0,,http://www.properati.com.ar/15kq8_venta_local_...,Vendo fondo de comercio: FarmaciaContacto: 261...,$ 10.000.000 - Fondo de Comercio en Venta - Fa...,https://thumbs4.properati.com/1/uoalKJi0AmsvFN...,,Venta,Venta,Venta
3854,3854,sell,Fondo de Comercio,Mendoza,|Argentina|Mendoza|,Argentina,Mendoza,3844419.0,"-32.92443466,-68.84568787",-32.924435,...,2.0,,http://www.properati.com.ar/15lln_venta_local_...,FARMACIA:VENDO EXCELENTE FONDO DE COMERCIO Inv...,$ 500.000 - Fondo de Comercio en Venta - Farma...,https://thumbs4.properati.com/8/yOAZ79VuV5dXVu...,,Venta,Venta,Venta
3855,3855,sell,Fondo de Comercio,Uspallata,|Argentina|Mendoza|Uspallata|,Argentina,Mendoza,3833358.0,"-32.92443466,-68.84568787",-32.924435,...,2.0,,http://www.properati.com.ar/15llq_venta_local_...,FARMACIA:VENDO EXCELENTE FONDO DE COMERCIO Inv...,$ 1.600.000 - Fondo de Comercio en Venta - Far...,https://thumbs4.properati.com/6/OC2MqgIxXt7x7B...,,Venta,Venta,Venta
3856,3856,sell,Fondo de Comercio,MaipÃº,|Argentina|Mendoza|MaipÃº|,Argentina,Mendoza,3845244.0,"-32.92443466,-68.84568787",-32.924435,...,2.0,,http://www.properati.com.ar/15llr_venta_local_...,FARMACIA:VENDO EXCELENTE FONDO DE COMERCIO Inv...,$ 900.000 - Fondo de Comercio en Venta - Farma...,https://thumbs4.properati.com/7/d5BWTQspKK8a16...,,Venta,Venta,Venta
3861,3861,sell,Fondo de Comercio,Mendoza,|Argentina|Mendoza|Mendoza|,Argentina,Mendoza,3844421.0,"-32.92443466,-68.84568787",-32.924435,...,2.0,,http://www.properati.com.ar/15llw_venta_local_...,FARMACIA:VENDO EXCELENTE FONDO DE COMERCIO Inv...,$ 2.500.000 - Fondo de Comercio en Venta - Far...,https://thumbs4.properati.com/0/1dpOWIokikJTkG...,,Venta,Venta,Venta
