In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("../raw_data/ar_properties.csv")

In [4]:
df.columns

Index(['id', 'ad_type', 'start_date', 'end_date', 'created_on', 'lat', 'lon',
       'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'rooms', 'bedrooms', 'bathrooms',
       'surface_total', 'surface_covered', 'price', 'currency', 'price_period',
       'title', 'description', 'property_type', 'operation_type'],
      dtype='object')

In [5]:
df = df.rename(columns={
    "lat":"latitude",
    "lon":"longitude",
    "l1":"country", 
    "l2":"province", 
    "l3":"city",
    "l4":"district",
    "l5":"estate"
})

In [6]:
df.columns

Index(['id', 'ad_type', 'start_date', 'end_date', 'created_on', 'latitude',
       'longitude', 'country', 'province', 'city', 'district', 'estate', 'l6',
       'rooms', 'bedrooms', 'bathrooms', 'surface_total', 'surface_covered',
       'price', 'currency', 'price_period', 'title', 'description',
       'property_type', 'operation_type'],
      dtype='object')

In [7]:
df.l6.value_counts()

Series([], Name: count, dtype: int64)

In [8]:
df = df.drop(columns=["l6"])

In [9]:
df.ad_type.value_counts()

ad_type
Propiedad    1000000
Name: count, dtype: int64

In [10]:
df = df.drop(columns=["ad_type"])

In [11]:
df.operation_type.value_counts()

operation_type
Venta                782122
Alquiler             183927
Alquiler temporal     33951
Name: count, dtype: int64

In [12]:
df["operation_type"] = df["operation_type"].replace(regex={
    "Venta":"Sale",
    "Alquiler temporal":"Temporary Rent",
    "Alquiler":"Rent"})

In [13]:
df.operation_type.value_counts()

operation_type
Sale              782122
Rent              183927
Temporary Rent     33951
Name: count, dtype: int64

In [14]:
df.property_type.value_counts()

property_type
Departamento       373376
Otro               239720
Casa               190023
Lote                83792
PH                  35217
Local comercial     34493
Oficina             26365
Cochera              8908
Depósito             6502
Casa de campo        1604
Name: count, dtype: int64

In [15]:
df["property_type"] = df["property_type"].replace(regex={
    "Departamento":"Apartment",
    "Local comercial":"Commercial Premises",
    "Oficina":"Office",
    "Depósito":"Warehouse",
    "Otro": "Other",
    "Casa de campo":"Village House",
    "Casa":"House",
    "Cochera":"Garage",
    "Lote":"Terrain",
    "PH": "Horizontal Property"
    })

In [17]:
df.property_type.value_counts()

property_type
Apartment              373376
Other                  239720
House                  190023
Terrain                 83792
Horizontal Property     35217
Commercial Premises     34493
Office                  26365
Garage                   8908
Warehouse                6502
Village House            1604
Name: count, dtype: int64

In [18]:
cond_rent = df["operation_type"] == "Rent"
cond_sale = df["operation_type"] == "Sale"
cond_curr = df["currency"] == "USD"
df[cond_sale & cond_curr].price.mean(), \
    df[cond_rent & cond_curr].price.mean()

(231140.34107096842, 7379.891555198346)

In [19]:
df.price_period.value_counts()

price_period
Mensual    362978
Semanal        29
Diario          8
Name: count, dtype: int64

In [20]:
df["price_period"] = df["price_period"].replace(regex={
    "Mensual":"Monthly",
    "Semanal":"Weekly",
    "Diario":"Daily"})

In [21]:
df.price_period.value_counts()

price_period
Monthly    362978
Weekly         29
Daily           8
Name: count, dtype: int64

In [22]:
df.sample(5)

Unnamed: 0,id,start_date,end_date,created_on,latitude,longitude,country,province,city,district,...,bathrooms,surface_total,surface_covered,price,currency,price_period,title,description,property_type,operation_type
829579,TvCgmj/fOYX25dlcpk0U3g==,2020-01-08,2020-02-19,2020-01-08,-32.967382,-60.633078,Argentina,Santa Fe,Rosario,,...,1.0,,,83000.0,USD,,Departamento de 1 dormitorio - Alem al 2400 - ...,"ALEM 2413Edificio en macrocentro, excelente ub...",Apartment,Sale
497720,WwK5O22x2KDlOQEmczM7sg==,2020-07-11,2020-12-03,2020-07-11,,,Argentina,Capital Federal,Belgrano,,...,1.0,29.0,29.0,109000.0,USD,,VENTA-DEPARTAMENTO-2 AMBIENTES- APTO PROFESION...,Corredor Responsable: Power Bienes Raíces SRL ...,Apartment,Sale
708019,9nF9hp+Z2z4H56Ry+beq+Q==,2020-11-14,9999-12-31,2020-11-14,-34.600468,-58.389531,Argentina,Capital Federal,Tribunales,,...,1.0,,94.0,80000.0,ARS,Monthly,Local - Tribunales,"Local comercial en excelente ubicación, muy co...",Commercial Premises,Rent
10714,G3bKyicSusXQb0iOgDtwOQ==,2020-08-02,2020-08-13,2020-08-02,-34.582653,-58.414818,Argentina,Capital Federal,Palermo,Palermo Chico,...,1.0,38.0,32.0,135000.0,USD,Monthly,Venta Monoambiente amplio y luminosos con Pil...,"Lindisimo Departamento Monoambiente de 38m2, l...",Apartment,Sale
342376,gNEP2j+plE51+Xb9SiLrsg==,2020-08-21,2020-08-25,2020-08-21,,,Argentina,Capital Federal,Palermo,Palermo Chico,...,1.0,95.0,95.0,1400.0,USD,,Working Ocampo - Modulo 3 - (K),Oficina en Working Ocampo ubicada en el MODULO...,Office,Rent


In [23]:
df.to_csv("preprocessed_ar_properties.csv")