In [1]:
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from folium import plugins
import matplotlib.pyplot as plt
from matplotlib import patches
from matplotlib.patches import FancyArrowPatch
import scipy.stats as stats
import pymongo
import seaborn as sns
import humanize
import warnings
import matplotlib.colors as mcolors  # For color interpolation
import folium
import os

In [2]:
warnings.filterwarnings('ignore')

load_dotenv()

True

In [3]:
title_font = {"fontname":"Nimbus Sans", "size":18, "color":"midnightblue"}
subtitle_font = {"fontname":"Roboto", "size":14, "color":"rebeccapurple"}
text_font = {"fontname": "DejaVu Sans", "size": 12, "color": "dimgray"}  # Ajustar el color para ser más coherente con el estilo general

# Ajustar el estilo de Seaborn
sns.set_style('ticks', {
    'axes.grid': True,
    'grid.color': '0.8',  # Color claro para la rejilla
    'grid.linestyle': '--',
    'grid.linewidth': 0.6,
    'font.sans-serif': ['Nimbus Sans', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', 'sans-serif'],
    'axes.edgecolor': '0.5'  # Ajustar el color de los bordes de los ejes para que sea suave
})

# Ajustar la resolución de la imagen
plt.rcParams['figure.dpi'] = 80

In [4]:
apartments = pd.read_csv('../data/processed/apartments.csv')
print(apartments.shape)
apartments = apartments.loc[(apartments['tipo_operacion'] == 'ARRIENDO') & (apartments['coords_modified'] == False)]
apartments = apartments.drop(['precio_venta', 'tipo_operacion', 'coords_modified'], axis=1)
apartments.head()

(19551, 45)


Unnamed: 0,codigo,tipo_propiedad,area,habitaciones,banos,administracion,parqueaderos,sector,estrato,antiguedad,...,terraza,vigilancia,localidad,barrio,estacion_tm_cercana,distancia_estacion_tm_m,is_cerca_estacion_tm,parque_cercano,distancia_parque_m,is_cerca_parque
9724,MC5214856,APARTAMENTO,106.0,3.0,2.0,560000.0,2.0,COLINA Y ALREDEDORES,4.0,ENTRE 5 Y 10 ANOS,...,0,0,SUBA,S.C. CIUDAD JARDIN NORTE,Suba - AV. Boyacá,258.77,1,PARQUE VECINAL AGRUPACIÓN DE VIVIENDA EN BALCO...,1169.15,0
9725,MC5226855,APARTAMENTO,70.0,3.0,2.0,300000.0,1.0,SALITRE MODELIA,4.0,MAS DE 20 ANOS,...,0,0,FONTIBON,SAUZALITO,Av. Rojas – UNISALESIANA,641.89,0,PARQUE VECINAL CARLOS LLERAS (EL FUEGO),205.51,1
9726,MC5224821,APARTAMENTO,44.5,1.0,1.0,391000.0,1.0,SANTA BARBARA,6.0,ENTRE 10 Y 20 ANOS,...,0,0,USAQUEN,SANTA BARBARA CENTRAL I Y II,Calle 106,1586.97,0,PARQUE METROPOLITANO EL COUNTRY,1714.01,0
9727,MC3587458,APARTAMENTO,43.0,1.0,1.0,,1.0,LA SOLEDAD,4.0,ENTRE 0 Y 5 ANOS,...,0,0,TEUSAQUILLO,ARMENIA,Calle 26,141.86,1,PARQUE METROPOLITANO EL RENACIMIENTO - PARQUE ...,445.06,1
9728,15234-M5227729,APARTAMENTO,212.0,4.0,4.0,,2.0,CHICO,6.0,MAS DE 20 ANOS,...,0,1,CHAPINERO,LA CABRERA,Héroes,1480.54,0,PARQUE VECINAL URBANIZACIÓN LA CABRERA,494.38,1


In [5]:
apartments.columns

Index(['codigo', 'tipo_propiedad', 'area', 'habitaciones', 'banos',
       'administracion', 'parqueaderos', 'sector', 'estrato', 'antiguedad',
       'latitud', 'longitud', 'direccion', 'descripcion', 'website',
       'last_view', 'datetime', 'url', 'timeline', 'estado', 'compañia',
       'precio_arriendo', 'jacuzzi', 'piso', 'closets', 'chimenea',
       'permite_mascotas', 'gimnasio', 'ascensor', 'conjunto_cerrado',
       'piscina', 'salon_comunal', 'terraza', 'vigilancia', 'localidad',
       'barrio', 'estacion_tm_cercana', 'distancia_estacion_tm_m',
       'is_cerca_estacion_tm', 'parque_cercano', 'distancia_parque_m',
       'is_cerca_parque'],
      dtype='object')

In [6]:
apartments['datetime'] = pd.to_datetime(apartments['datetime'])

In [7]:
fotmat_tick_humanize = lambda x, pos: humanize.intword(x)

# Removing Outliers

In [8]:
apartments['precio_arriendo'].describe().apply(lambda x: humanize.intword(x))

count      9.1 thousand
mean       15.3 million
std       267.2 million
min      550.0 thousand
25%         2.2 million
50%         4.5 million
75%         9.0 million
max        16.9 billion
Name: precio_arriendo, dtype: object

In [9]:
Q1 = apartments['precio_arriendo'].quantile(0.25)
Q3 = apartments['precio_arriendo'].quantile(0.75)

IQR = Q3 - Q1

lower = Q1 - 1.8 * IQR
upper = Q3 + 1.8 * IQR

if lower < 0:
    lower = Q1 - 0.4 * IQR

outliers = apartments.loc[(apartments['precio_arriendo'] < lower) | (
    apartments['precio_arriendo'] > upper)]
outliers.shape
# lower, upper

(321, 42)

In [10]:
# Porcentaje de outliers
print(
    f"Porcentaje de outliers: {round(outliers.shape[0] / apartments.shape[0], 5) *100} %")

Porcentaje de outliers: 3.517 %


In [11]:
apartments = apartments[(apartments['precio_arriendo'] > lower) & (
    apartments['precio_arriendo'] < upper)]
apartments.shape

(8805, 42)

In [12]:
apartments['precio_arriendo'].describe().apply(
    lambda x: humanize.intword(x)).to_frame()

Unnamed: 0,precio_arriendo
count,8.8 thousand
mean,5.8 million
std,4.7 million
min,550.0 thousand
25%,2.2 million
50%,4.3 million
75%,8.0 million
max,21.2 million


## Create map

In [13]:

# Create a base map centered around Bogotá
m = folium.Map(location=[4.7110, -74.0721], zoom_start=12)



In [14]:
# Prepare data for heatmap
heat_data = [[row['latitud'], row['longitud']]
             for index, row in apartments.iterrows()]

# Add a heatmap to the base map
HeatMap(heat_data).add_to(m)

m.save('../visualizations/heatmap.html')
m

In [16]:
# Create base map centered on Bogotá
m = folium.Map(location=[4.7110, -74.0721], zoom_start=12)

# Bin the latitude and longitude into grid cells (lattice)
lat_bins = np.linspace(apartments['latitud'].min(
), apartments['latitud'].max(), 60)  # Adjust number of bins as needed
lon_bins = np.linspace(
    apartments['longitud'].min(), apartments['longitud'].max(), 60)

apartments['lat_bin'] = pd.cut(apartments['latitud'], bins=lat_bins)
apartments['lon_bin'] = pd.cut(apartments['longitud'], bins=lon_bins)

# Aggregate rent price by these bins
apartments_grouped = apartments.groupby(['lat_bin', 'lon_bin']).agg(
    {'precio_arriendo': 'mean'}).reset_index()

# Calculate the center of each bin (for latitude and longitude)
apartments_grouped['lat_center'] = apartments_grouped['lat_bin'].apply(
    lambda x: x.mid)
apartments_grouped['lon_center'] = apartments_grouped['lon_bin'].apply(
    lambda x: x.mid)

# Normalize rent prices to use for color scaling
min_price = apartments_grouped['precio_arriendo'].min()
max_price = apartments_grouped['precio_arriendo'].max()

# Create a color map from green to red
colormap = mcolors.LinearSegmentedColormap.from_list(
    "rent_colormap", ['green', 'yellow', 'red'])

# Function to assign colors based on rent price


def get_color(price):
    # Normalize price to range between 0 and 1
    norm_price = (price - min_price) / (max_price - min_price)
    # Get the corresponding color from the colormap
    return mcolors.to_hex(colormap(norm_price))


# Add grid cells (rectangles) to the map
for index, row in apartments_grouped.iterrows():
    if pd.notna(row['precio_arriendo']):  # Ensure there is a price for the bin
        folium.Rectangle(
            bounds=[
                # Bottom-left corner of the rectangle
                [row['lat_bin'].left, row['lon_bin'].left],
                # Top-right corner of the rectangle
                [row['lat_bin'].right, row['lon_bin'].right]
            ],
            color=get_color(row['precio_arriendo']),
            fill=True,
            fill_opacity=0.6,
            popup=f"Avg Price: ${round(row['precio_arriendo'])}"
        ).add_to(m)

# Save the map as an HTML file
m.save('../visualizations/lattice_map.html')
m

In [21]:
# Create base map centered on Bogotá
m = folium.Map(location=[4.7110, -74.0721], zoom_start=12)

# Bin the latitude and longitude into grid cells (lattice)
lat_bins = np.linspace(apartments['latitud'].min(
), apartments['latitud'].max(), 75)  # Adjust number of bins as needed
lon_bins = np.linspace(
    apartments['longitud'].min(), apartments['longitud'].max(), 75)

apartments['lat_bin'] = pd.cut(apartments['latitud'], bins=lat_bins)
apartments['lon_bin'] = pd.cut(apartments['longitud'], bins=lon_bins)

# Aggregate rent price by these bins
apartments_grouped = apartments.groupby(['lat_bin', 'lon_bin']).agg(
    {'precio_arriendo': 'mean'}).reset_index()

# Calculate the center of each bin (for latitude and longitude)
apartments_grouped['lat_center'] = apartments_grouped['lat_bin'].apply(
    lambda x: x.mid)
apartments_grouped['lon_center'] = apartments_grouped['lon_bin'].apply(
    lambda x: x.mid)

# Normalize rent prices to use for color scaling
min_price = apartments_grouped['precio_arriendo'].min()
max_price = apartments_grouped['precio_arriendo'].max()

# Create a color map from green to red
colormap = mcolors.LinearSegmentedColormap.from_list(
    "rent_colormap", ['green', 'yellow', 'red'])

# Function to normalize using quantiles
def get_quantile_color(price):
    # Calculate quantiles of the rent price
    q10 = apartments_grouped['precio_arriendo'].quantile(
        0.10)  # 10th percentile
    q90 = apartments_grouped['precio_arriendo'].quantile(
        0.90)  # 90th percentile

    # Cap the price between the 10th and 90th percentiles
    if price < q10:
        price = q10
    elif price > q90:
        price = q90

    # Normalize price to range between 0 and 1 based on quantiles
    norm_price = (price - q10) / (q90 - q10)

    # Get color from the colormap
    return mcolors.to_hex(colormap(norm_price))


# Apply the quantile color mapping in the loop
for index, row in apartments_grouped.iterrows():
    if pd.notna(row['precio_arriendo']):  # Ensure there is a price for the bin
        folium.Rectangle(
            bounds=[
                # Bottom-left corner of the rectangle
                [row['lat_bin'].left, row['lon_bin'].left],
                # Top-right corner of the rectangle
                [row['lat_bin'].right, row['lon_bin'].right]
            ],
            color=get_quantile_color(row['precio_arriendo']),
            fill=True,
            fill_opacity=0.5,
            stroke=False,
            popup=f"Avg Price: ${round(row['precio_arriendo'])}"
        ).add_to(m)
        
# Save the map as an HTML file
m.save('../visualizations/lattice_map_quantile.html')
m

# create a marker cluster
marker_cluster = MarkerCluster().add_to(m)

# Add points to the cluster
for index, row in apartments.iterrows():
    folium.Marker(
        location=[row['latitud'], row['longitud']],
        popup=f"${row['precio_arriendo']}",
    ).add_to(marker_cluster)
    
m.save('../reports/maps/apartments.html')
m