# 1) Preparación previa

### Carga de librerías

In [1]:
import pandas as pd
import re
import numpy as np

### Lectura del dataset original de Properati

In [3]:
data = pd.read_csv("properati.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,properati_url,description,title,image_thumbnail
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,40.0,1127.272727,1550.0,,,,http://www.properati.com.ar/15bo8_venta_ph_mat...,"2 AMBIENTES TIPO CASA PLANTA BAJA POR PASILLO,...",2 AMB TIPO CASA SIN EXPENSAS EN PB,https://thumbs4.properati.com/8/BluUYiHJLhgIIK...
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,,,,,,,http://www.properati.com.ar/15bob_venta_depart...,Venta de departamento en décimo piso al frente...,VENTA Depto 2 dorm. a estrenar 7 e/ 36 y 37 ...,https://thumbs4.properati.com/7/ikpVBu2ztHA7jv...
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,55.0,1309.090909,1309.090909,,,,http://www.properati.com.ar/15bod_venta_depart...,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,2 AMB 3ER PISO CON ASCENSOR APTO CREDITO,https://thumbs4.properati.com/5/SXKr34F_IwG3W_...
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,,,,,,,http://www.properati.com.ar/15boh_venta_ph_lin...,PH 3 ambientes con patio. Hay 3 deptos en lote...,PH 3 amb. cfte. reciclado,https://thumbs4.properati.com/3/DgIfX-85Mog5SP...
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,35.0,1828.571429,1828.571429,,,,http://www.properati.com.ar/15bok_venta_depart...,DEPARTAMENTO CON FANTÁSTICA ILUMINACIÓN NATURA...,DEPTO 2 AMB AL CONTRAFRENTE ZONA CENTRO/PLAZA ...,https://thumbs4.properati.com/5/xrRqlNcSI_vs-f...


### Separación de columna con muchas ubicaciones

In [4]:
# La columna "place_with_parent_names" tiene información separada con '|'. Se separa para obtener info adicional
separado = data["place_with_parent_names"].str.split('|', expand = True)
separado.head()

Unnamed: 0,0,1,2,3,4,5,6
0,,Argentina,Capital Federal,Mataderos,,,
1,,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,,,
2,,Argentina,Capital Federal,Mataderos,,,
3,,Argentina,Capital Federal,Liniers,,,
4,,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Centro,,


### Agregado de la nueva información en nuevas columnas

In [5]:
separado.columns = ['??', 'Pais', 'Zona', 'Partido', 'Barrios', 'Country', 'Otra']
data_concat = pd.concat([data, separado], axis=1)

### Cálculo del tipo de cambio para corroborar

In [6]:
data_concat['TC'] = data_concat['price_aprox_local_currency'] / data_concat['price_aprox_usd'] 
# Calculando el promedio, decidimos tomar la columna 'price_aprox_usd' como el $ de las propiedades
data_concat['TC'].mean().round(2)

17.64

### Revisión de registros nulos según columnas

In [9]:
data_concat.isnull().sum()

Unnamed: 0                         0
operation                          0
property_type                      0
place_name                        23
place_with_parent_names            0
country_name                       0
state_name                         0
geonames_id                    18717
lat-lon                        51550
lat                            51550
lon                            51550
price                          20410
currency                       20411
price_aprox_local_currency     20410
price_aprox_usd                20410
surface_total_in_m2            39328
surface_covered_in_m2          19907
price_usd_per_m2               52603
price_per_m2                   33562
floor                         113321
rooms                          73830
expenses                      106958
properati_url                      0
description                        2
title                              0
image_thumbnail                 3112
??                                 0
P

### Creación de función para limpiar superficie

In [10]:
# La siguiente función nos permite limpiar la superficie según las inconsistencias entre la total y la cubierta.
# Se eligií dicha columna con respecto a 'surface_total_in_m2' ya que tenía menor cantidad de registros nulos
def limpieza_superficie(sup_total, sup_cubierta):
    if sup_total is not None and sup_cubierta is not None:
            if sup_total >= sup_cubierta:
                superficie = sup_total
            else: 
                superficie = np.NaN
    elif sup_total is not None:
            superficie = sup_total
    elif sup_cubierta is not None:
            superficie = sup_cubierta
    else: superficie = np.NaN        
                    
    return superficie

### Aplicación de la susodicha función

In [11]:
data_concat["superficie"] = data_concat[['surface_total_in_m2','surface_covered_in_m2']].apply(lambda data_concat: limpieza_superficie(data_concat['surface_total_in_m2'],data_concat['surface_covered_in_m2']),axis=1)
data_concat.head(10)

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,image_thumbnail,??,Pais,Zona,Partido,Barrios,Country,Otra,TC,superficie
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,https://thumbs4.properati.com/8/BluUYiHJLhgIIK...,,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,https://thumbs4.properati.com/7/ikpVBu2ztHA7jv...,,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,,,,17.6445,
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,https://thumbs4.properati.com/5/SXKr34F_IwG3W_...,,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,https://thumbs4.properati.com/3/DgIfX-85Mog5SP...,,Argentina,Capital Federal,Liniers,,,,17.6445,
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,https://thumbs4.properati.com/5/xrRqlNcSI_vs-f...,,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Centro,,,17.6445,35.0
5,5,sell,house,Gualeguaychú,|Argentina|Entre Ríos|Gualeguaychú|,Argentina,Entre Ríos,3433657.0,"-33.0140714,-58.519828",-33.014071,...,https://thumbs4.properati.com/6/q-w68gvaUEQVXI...,,Argentina,Entre Ríos,Gualeguaychú,,,,,
6,6,sell,PH,Munro,|Argentina|Bs.As. G.B.A. Zona Norte|Vicente Ló...,Argentina,Bs.As. G.B.A. Zona Norte,3430511.0,"-34.5329567,-58.5217825",-34.532957,...,https://thumbs4.properati.com/5/6GOXsHCyDu1aGx...,,Argentina,Bs.As. G.B.A. Zona Norte,Vicente López,Munro,,,17.6445,106.0
7,7,sell,apartment,Belgrano,|Argentina|Capital Federal|Belgrano|,Argentina,Capital Federal,3436077.0,"-34.5598729,-58.443362",-34.559873,...,https://thumbs4.properati.com/1/IHxARynlr8sPEW...,,Argentina,Capital Federal,Belgrano,,,,17.6445,45.0
8,8,sell,apartment,Belgrano,|Argentina|Capital Federal|Belgrano|,Argentina,Capital Federal,3436077.0,"-34.5598729,-58.443362",-34.559873,...,https://thumbs4.properati.com/2/J3zOjgaFHrkvnv...,,Argentina,Capital Federal,Belgrano,,,,17.6445,65.0
9,9,sell,house,Rosario,|Argentina|Santa Fe|Rosario|,Argentina,Santa Fe,3838574.0,"-32.942031,-60.7259192",-32.942031,...,https://thumbs4.properati.com/8/RCf1YEWdF4rv98...,,Argentina,Santa Fe,Rosario,,,,17.6445,


### Creación de la columna de precios por metros cuadrados con la nueva superficie

In [12]:
data_concat['Precio_USD_por_M2']=data_concat.price_aprox_usd/data_concat.superficie 
data_concat.head()

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,??,Pais,Zona,Partido,Barrios,Country,Otra,TC,superficie,Precio_USD_por_M2
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1127.272727
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,,,,17.6445,,
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1309.090909
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,,Argentina,Capital Federal,Liniers,,,,17.6445,,
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Centro,,,17.6445,35.0,1828.571429


### Eliminación de columnas

In [13]:
# Algunas se eliminan por no corresponder con nuestro analisis y otras por estar repetidas
data_eliminacion = data_concat.drop(['Unnamed: 0', 'operation', 'place_with_parent_names', 'place_name', 'country_name', 'state_name',
                                     'description', 'geonames_id', 'lat-lon', 'floor', 'rooms', 'expenses', 'properati_url', 
                                     'image_thumbnail', 'title', '??'], axis = 1)
data_eliminacion.head()

Unnamed: 0,property_type,lat,lon,price,currency,price_aprox_local_currency,price_aprox_usd,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,price_per_m2,Pais,Zona,Partido,Barrios,Country,Otra,TC,superficie,Precio_USD_por_M2
0,PH,-34.661824,-58.508839,62000.0,USD,1093959.0,62000.0,55.0,40.0,1127.272727,1550.0,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1127.272727
1,apartment,-34.903883,-57.96433,150000.0,USD,2646675.0,150000.0,,,,,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,,,,17.6445,,
2,apartment,-34.652262,-58.522982,72000.0,USD,1270404.0,72000.0,55.0,55.0,1309.090909,1309.090909,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1309.090909
3,PH,-34.647797,-58.516424,95000.0,USD,1676227.5,95000.0,,,,,Argentina,Capital Federal,Liniers,,,,17.6445,,
4,apartment,-38.002626,-57.549447,64000.0,USD,1129248.0,64000.0,35.0,35.0,1828.571429,1828.571429,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Centro,,,17.6445,35.0,1828.571429


In [14]:
# Corroboramos el tamaño del resultado
data_eliminacion.shape

(121220, 20)

### Eliminamos los valores nulos del precio nuevo

In [15]:
df_final = data_eliminacion[data_eliminacion['Precio_USD_por_M2'].notna()]
df_final

Unnamed: 0,property_type,lat,lon,price,currency,price_aprox_local_currency,price_aprox_usd,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,price_per_m2,Pais,Zona,Partido,Barrios,Country,Otra,TC,superficie,Precio_USD_por_M2
0,PH,-34.661824,-58.508839,62000.0,USD,1093959.00,62000.0,55.0,40.0,1127.272727,1550.000000,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1127.272727
2,apartment,-34.652262,-58.522982,72000.0,USD,1270404.00,72000.0,55.0,55.0,1309.090909,1309.090909,Argentina,Capital Federal,Mataderos,,,,17.6445,55.0,1309.090909
4,apartment,-38.002626,-57.549447,64000.0,USD,1129248.00,64000.0,35.0,35.0,1828.571429,1828.571429,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Centro,,,17.6445,35.0,1828.571429
6,PH,-34.532957,-58.521782,130000.0,USD,2293785.00,130000.0,106.0,78.0,1226.415094,1666.666667,Argentina,Bs.As. G.B.A. Zona Norte,Vicente López,Munro,,,17.6445,106.0,1226.415094
7,apartment,-34.559873,-58.443362,138000.0,USD,2434941.00,138000.0,45.0,40.0,3066.666667,3450.000000,Argentina,Capital Federal,Belgrano,,,,17.6445,45.0,3066.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121215,apartment,,,870000.0,USD,15350715.00,870000.0,113.0,93.0,7699.115044,9354.838710,Argentina,Capital Federal,Belgrano,,,,17.6445,113.0,7699.115044
121216,house,,,498000.0,USD,8786961.00,498000.0,360.0,360.0,1383.333333,1383.333333,Argentina,Bs.As. G.B.A. Zona Norte,San Isidro,Beccar,,,17.6445,360.0,1383.333333
121217,apartment,-34.570639,-58.475596,131500.0,USD,2320251.75,131500.0,46.0,39.0,2858.695652,3371.794872,Argentina,Capital Federal,Villa Urquiza,,,,17.6445,46.0,2858.695652
121218,apartment,,,95900.0,USD,1692107.55,95900.0,48.0,48.0,1997.916667,1997.916667,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,Plaza Colón,,,17.6445,48.0,1997.916667


# 2) Análisis estadístico breve del resultado

### Cálculo de % de registros según la provincia/región

In [23]:
(df_final.Zona.value_counts() / df_final.Zona.size) * 100

Capital Federal                 35.631263
Bs.As. G.B.A. Zona Norte        26.295792
Bs.As. G.B.A. Zona Sur           8.883367
Buenos Aires Costa Atlántica     8.219639
Bs.As. G.B.A. Zona Oeste         6.613226
Santa Fe                         5.539078
Córdoba                          5.014830
Buenos Aires Interior            1.426854
Corrientes                       0.472946
Mendoza                          0.463327
Neuquén                          0.343086
Misiones                         0.213226
San Luis                         0.189178
Río Negro                        0.160321
Tucumán                          0.153908
Entre Ríos                       0.120240
Salta                            0.078557
Chubut                           0.054509
Tierra Del Fuego                 0.041683
Chaco                            0.032064
La Pampa                         0.016032
Santa Cruz                       0.012826
Catamarca                        0.008016
Jujuy                            0

### Agrupamos por provincia/región

In [16]:
data_agrupada_prov = df_final.groupby('Zona')
data_agrupada_prov

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000026AE0AFFEC8>

### Calculamos las métricas según provincia/región

In [21]:
medidas = data_agrupada_prov[["Precio_USD_por_M2"]].describe().round(2)

In [22]:
# Ordenamos según count
medidas.sort_values(by=[('Precio_USD_por_M2', 'count')], ascending = False)

Unnamed: 0_level_0,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2,Precio_USD_por_M2
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Zona,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Capital Federal,22225.0,3123.37,3720.31,24.21,1969.7,2469.44,3068.18,206333.33
Bs.As. G.B.A. Zona Norte,16402.0,1833.34,1789.96,4.17,1128.73,1666.67,2327.11,48296.22
Bs.As. G.B.A. Zona Sur,5541.0,1521.41,1108.3,19.31,1000.0,1500.0,1937.5,23140.76
Buenos Aires Costa Atlántica,5127.0,1522.44,807.09,3.0,909.09,1579.17,2045.44,6422.22
Bs.As. G.B.A. Zona Oeste,4125.0,1289.72,1582.14,7.0,714.29,1225.0,1666.67,80000.0
Santa Fe,3455.0,2251.97,4445.15,0.6,1266.79,1585.44,1889.57,57382.08
Córdoba,3128.0,1186.38,860.71,1.18,801.96,1192.6,1542.53,30000.0
Buenos Aires Interior,890.0,934.2,688.9,2.07,314.92,833.33,1476.13,4166.67
Corrientes,295.0,1499.2,627.57,83.68,1078.25,1461.54,1865.32,3500.0
Mendoza,289.0,1683.17,2602.58,16.47,992.08,1428.57,1684.21,31578.95


### Métricas generales del data set

In [25]:
medidas_finales = df_final[["Precio_USD_por_M2"]].describe().round(2)
medidas_finales

Unnamed: 0,Precio_USD_por_M2
count,62375.0
mean,2176.12
std,2814.35
min,0.6
25%,1222.22
50%,1818.18
75%,2500.0
max,206333.33


### Exportación del DF final

In [26]:
df_final.to_csv('DF_Final.csv', index = False)