In [68]:
#se debe instalar ipyleaflet, mpu, openpyxl y requests

import requests
from pandas import json_normalize
import ipyleaflet
import mpu
import pandas as pd
import numpy as np
from ipyleaflet import Map, basemaps, Marker, AwesomeIcon, Icon

## Íconos y mapas a usar luego

In [69]:
basic_map = Map(basemap=basemaps.OpenStreetMap.Mapnik, center=(-34.60823238941058, -58.398160858739075), zoom=14)

icono_subte = Icon(icon_url='icono_subte.png', icon_size=[28, 28])

icon0 = AwesomeIcon(
    name='home',
    marker_color='green',
    icon_color='black',
    spin=False
)

icon1 = AwesomeIcon(
    name='home',
    marker_color='beige',
    icon_color='black',
    spin=False
)

icon2 = AwesomeIcon(
    name='home',
    marker_color='orange',
    icon_color='black',
    spin=False
)

icon3 = AwesomeIcon(
    name='home',
    marker_color='red',
    icon_color='black',
    spin=False
)

## Estaciones a usar luego

In [70]:
# direccion web para acceder a los datos del gobierno de la ciudad
url = 'https://cdn.buenosaires.gob.ar/datosabiertos/datasets/subte-estaciones/subte_estaciones.geojson'
response = requests.get(url)
dictr = response.json()
# referencio a las estaciones dentro del conjunto de datos
estaciones = dictr['features']
df = json_normalize(estaciones)
# creo la columna Latitud
df['Latitud'] = df['geometry.coordinates'].apply(lambda x:x[0])
# creo la columna Longitud
df['Longitud'] = df['geometry.coordinates'].apply(lambda x:x[1])
# creo la máscara para filtrar los de Linea A
mascara_linea_a = df['properties.LINEA'] == 'A'
linea_a = df.loc[mascara_linea_a, :]
linea_a_limpio = linea_a.drop(['type', 'properties.ID', 'properties.LINEA', 'geometry.type', 'geometry.coordinates'], axis = 1)
linea_a_limpio

Unnamed: 0,properties.ESTACION,Latitud,Longitud
30,PERU,-58.374268,-34.608559
31,PIEDRAS,-58.379085,-34.608882
32,LIMA,-58.382232,-34.6091
33,SAENZ PEÑA,-58.386777,-34.609413
34,CONGRESO - PDTE. DR. RAÚL R. ALFONSÍN,-58.392669,-34.609226
35,PASCO,-58.398427,-34.609646
36,ALBERTI,-58.401208,-34.609834
37,PLAZA DE MISERERE,-58.406707,-34.609817
38,LORIA,-58.415186,-34.610782
39,CASTRO BARROS,-58.421816,-34.61177


## Funciones

In [71]:
def distancia(lat, long):
    aux = []
    for index, row in linea_a_limpio.iterrows():
        aux.append(
            mpu.haversine_distance(
                (row['Latitud'], row['Longitud']), 
                (lat, long)
            )
        )
    return min(aux)

def marca_propiedad(lat, long, quantile):
    if quantile == 0:
        icono_prop = icon0
    elif quantile == 1:
        icono_prop = icon1
    elif quantile == 2:
        icono_prop = icon2  
    else: 
        icono_prop = icon3
    marker = Marker(location=(lat,long), draggable=False, icon=icono_prop)
    basic_map.add_layer(marker)
    
def limpieza_superficie(sup_total, sup_cubierta):
    if sup_total is not None and sup_cubierta is not None:
            if sup_total >= sup_cubierta:
                superficie = sup_total
            else: 
                superficie = 0
    elif sup_total is not None:
            superficie = sup_total
    elif sup_cubierta is not None:
            superficie = sup_cubierta
    else: superficie = 0        
                    
    return superficie

## Lectura del dataset Original de Properati

In [72]:
df_properati = pd.read_csv('properati.csv')

## Genera df solo para Capital

In [73]:
df_capital = df_properati[df_properati["state_name"] == "Capital Federal"]
df_capital.shape

(32316, 26)

In [74]:
Barrios = df_capital["place_name"].value_counts()

## Corrige ortografia de Barrios y mapea Barrios Inexistentes

In [75]:
%%capture output
df_capital["place_name"].replace(['San Cristobal','Barrio Norte', 'Centro / Microcentro'], ['San Cristóbal','Recoleta', 'Monserrat'], inplace=True)

In [76]:
barrios_conv = df_capital["place_name"].value_counts()

## Lee el archivo de Mapping de Barrios por Comuna

In [77]:
df_barrios_comuna = pd.read_csv("Barrios_x_Comuna_vsc_utf8.csv")

## Merge de df de Barrios de Capital con Comunas

In [78]:
df_capital_comuna = pd.merge(df_capital, df_barrios_comuna, how='left', left_on='place_name', right_on='Barrio')
del df_capital_comuna["description"]
del df_capital_comuna["title"]
del df_capital_comuna["properati_url"]
del df_capital_comuna["image_thumbnail"]


In [80]:
df_capital_comuna["Comuna"].isna().sum()

3612

## Enumera los Barrrios por los que atraviesa la Linea A

In [81]:
barrios_linea_a = pd.Series(['Monserrat', 'Balvanera', 'Almagro', 'Caballito'])
barrios_linea_a

0    Monserrat
1    Balvanera
2      Almagro
3    Caballito
dtype: object

## Filtra el df de Capital por los Barrios definidos

In [82]:
df_capital_barrios_subte = df_capital_comuna[df_capital_comuna['Barrio'].isin(barrios_linea_a)]
df_capital_barrios_subte.shape

(4866, 24)

## Elimina los Registros sin lat-long

In [83]:
df_latlon_notna = df_capital_barrios_subte[df_capital_barrios_subte['lat-lon'].notna()]

In [84]:
df_latlon_notna.shape

(4031, 24)

## Calcula las distancias de cada Propiedad hasta las 17 estaciones

In [86]:
%%capture output
df_latlon_notna['distancia'] = df_latlon_notna.apply(lambda x: distancia(x['lat'], x['lon']), axis=1)
df_latlon_notna

In [87]:
%%capture output
df_latlon_notna.loc["superficie"] = df_latlon_notna[['surface_total_in_m2','surface_covered_in_m2']].apply(lambda df_latlon_notna: limpieza_superficie(df_latlon_notna['surface_total_in_m2'],df_latlon_notna['surface_covered_in_m2']),axis=1)
df_latlon_notna.head(4)

## Exportamos a CSV

In [88]:
df_latlon_notna.to_csv("CSV_notna.csv", index=False)

## Seguimos eliminando NA

In [89]:
df_price_notna = df_latlon_notna[df_latlon_notna['price_usd_per_m2'].notna()]
df_price_notna

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,Barrio,Comuna,distancia
14,48.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6096748,-58.3986588",-34.609675,...,141.0,124.0,1702.127660,1935.483871,,5.0,,Balvanera,Comuna-3,3172.512042
16,52.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6222111,-58.4391283",-34.622211,...,50.0,50.0,2200.000000,2200.000000,,3.0,,Caballito,Comuna-6,3172.903899
17,53.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6238943143,-58.4468606529",-34.623894,...,100.0,46.0,1705.000000,3706.521739,,2.0,,Caballito,Comuna-6,3173.050028
18,54.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6209278,-58.4458736",-34.620928,...,81.0,75.0,2721.111111,2938.800000,,3.0,,Caballito,Comuna-6,3173.305581
19,55.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6209278,-58.4458736",-34.620928,...,81.0,75.0,2750.370370,2970.400000,,3.0,,Caballito,Comuna-6,3173.305581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31793,119970.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6096441,-58.3964932",-34.609644,...,45.0,38.0,2631.111111,3115.789474,,1.0,,Balvanera,Comuna-3,3172.427328
31817,120075.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6221978289,-58.4291563359",-34.622198,...,78.0,70.0,2051.282051,2285.714286,,,2300.0,Caballito,Comuna-6,3172.500548
32198,120984.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6204033093,-58.4410224908",-34.620403,...,77.0,77.0,1909.090909,1909.090909,,,3800.0,Caballito,Comuna-6,3173.160941
32267,121059.0,sell,apartment,Monserrat,|Argentina|Capital Federal|Monserrat|,Argentina,Capital Federal,3430570.0,"-34.613408336,-58.3845426597",-34.613408,...,37.0,34.0,2310.810811,2514.705882,,,1200.0,Monserrat,Comuna-1,3171.567746


In [90]:
df_randomizado = df_price_notna.sample(n=10, random_state = 100)
df_randomizado.head(10)

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,Barrio,Comuna,distancia
9331,36566.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6051224,-58.4046096",-34.605122,...,45.0,45.0,1988.888889,1988.888889,,2.0,,Balvanera,Comuna-3,3173.207101
28856,110461.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6110583,-58.4201892",-34.611058,...,90.0,77.0,2111.111111,2467.532468,,4.0,,Almagro,Comuna-5,3173.24708
29180,111497.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6110268,-58.424513",-34.611027,...,60.0,45.0,1566.666667,2088.888889,,3.0,,Almagro,Comuna-5,3173.425595
5851,24942.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6113092,-58.4381058",-34.611309,...,190.0,170.0,1684.210526,1882.352941,,5.0,,Caballito,Comuna-6,3173.948914
14770,56081.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.609032,-58.403378",-34.609032,...,70.0,70.0,1714.285714,1714.285714,4.0,3.0,2000.0,Balvanera,Comuna-3,3172.767417
13120,49742.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6133124313,-58.4052081146",-34.613312,...,46.0,42.0,2304.347826,2523.809524,5.0,2.0,1700.0,Balvanera,Comuna-3,3172.414895
18778,71765.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6060340291,-58.452885322",-34.606034,...,47.0,40.0,2836.170213,3332.5,,,,Caballito,Comuna-6,3175.07448
4524,19659.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6032835,-58.4328461",-34.603284,...,93.0,85.0,2741.935484,3000.0,,4.0,3100.0,Almagro,Comuna-5,3174.535446
2722,12321.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6051042,-58.4129088",-34.605104,...,77.0,72.0,1753.246753,1875.0,,3.0,2300.0,Almagro,Comuna-5,3173.545373
5269,22759.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.602203,-58.3985049",-34.602203,...,74.0,68.0,1594.594595,1735.294118,,3.0,,Balvanera,Comuna-3,3173.250722


In [91]:
df_randomizado['quantile'] = pd.qcut(df_randomizado['price_usd_per_m2'], 4, labels=False)
df_randomizado

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,Barrio,Comuna,distancia,quantile
9331,36566.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6051224,-58.4046096",-34.605122,...,45.0,1988.888889,1988.888889,,2.0,,Balvanera,Comuna-3,3173.207101,2
28856,110461.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6110583,-58.4201892",-34.611058,...,77.0,2111.111111,2467.532468,,4.0,,Almagro,Comuna-5,3173.24708,2
29180,111497.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6110268,-58.424513",-34.611027,...,45.0,1566.666667,2088.888889,,3.0,,Almagro,Comuna-5,3173.425595,0
5851,24942.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6113092,-58.4381058",-34.611309,...,170.0,1684.210526,1882.352941,,5.0,,Caballito,Comuna-6,3173.948914,0
14770,56081.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.609032,-58.403378",-34.609032,...,70.0,1714.285714,1714.285714,4.0,3.0,2000.0,Balvanera,Comuna-3,3172.767417,1
13120,49742.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.6133124313,-58.4052081146",-34.613312,...,42.0,2304.347826,2523.809524,5.0,2.0,1700.0,Balvanera,Comuna-3,3172.414895,3
18778,71765.0,sell,apartment,Caballito,|Argentina|Capital Federal|Caballito|,Argentina,Capital Federal,3435874.0,"-34.6060340291,-58.452885322",-34.606034,...,40.0,2836.170213,3332.5,,,,Caballito,Comuna-6,3175.07448,3
4524,19659.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6032835,-58.4328461",-34.603284,...,85.0,2741.935484,3000.0,,4.0,3100.0,Almagro,Comuna-5,3174.535446,3
2722,12321.0,sell,apartment,Almagro,|Argentina|Capital Federal|Almagro|,Argentina,Capital Federal,3436397.0,"-34.6051042,-58.4129088",-34.605104,...,72.0,1753.246753,1875.0,,3.0,2300.0,Almagro,Comuna-5,3173.545373,1
5269,22759.0,sell,apartment,Balvanera,|Argentina|Capital Federal|Balvanera|,Argentina,Capital Federal,6693228.0,"-34.602203,-58.3985049",-34.602203,...,68.0,1594.594595,1735.294118,,3.0,,Balvanera,Comuna-3,3173.250722,0


## Basic Map

In [92]:
## basic_map = Map(basemap=basemaps.Esri.DeLorme, center=(4.6097, -74.08), zoom=6)

for index, row in linea_a_limpio.iterrows():
    marker = Marker(location=(row['Latitud'],row['Longitud']), draggable=False, icon=icono_subte)
    basic_map.add_layer(marker);

display(basic_map)

Map(center=[-34.60823238941058, -58.398160858739075], controls=(ZoomControl(options=['position', 'zoom_in_text…

## Exportamos un .CSV

In [93]:
df_randomizado.to_csv("df_randomizado.csv", index=False)