In [2]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [3]:
# Robust CSV loading: try common encodings and fall back to chardet
from pathlib import Path
import chardet
filepath = Path('mexico-real-estate-clean.csv')
encodings_to_try = ['utf-8', 'utf-8-sig', 'latin1', 'iso-8859-1', 'cp1252']
df = None
for enc in encodings_to_try:
    try:
        df = pd.read_csv(filepath, encoding=enc)
        print(f'Loaded with encoding: {enc}')
        break
    except Exception as e:
        # Keep going to next encoding
        print(f'Failed with encoding {enc}: {e}')
if df is None:
    # Fallback: detect encoding with chardet on a sample
    raw = filepath.read_bytes()[:100000]  # sample first 100KB
    guess = chardet.detect(raw)
    detected = guess.get('encoding')
    confidence = guess.get('confidence')
    print(f'Chardet detected: {detected} (confidence={confidence})')
    if detected:
        df = pd.read_csv(filepath, encoding=detected)
        print(f'Loaded with chardet-detected encoding: {detected}')
    else:
        raise UnicodeDecodeError('Unable to determine encoding; try opening the file in a text editor and re-saving with UTF-8')
# Quick preview
print(df.head())

Failed with encoding utf-8: 'utf-8' codec can't decode byte 0xe9 in position 11: invalid continuation byte
Failed with encoding utf-8-sig: 'utf-8' codec can't decode byte 0xe9 in position 64: invalid continuation byte
Loaded with encoding: latin1
  property_type             state        lat         lon  area_m2  price_usd
0         house  Estado de México  19.560181  -99.233528      150   67965.56
1         house        Nuevo León  25.688436 -100.198807      186   63223.78
2     apartment          Guerrero  16.767704  -99.764383       82   84298.37
3     apartment          Guerrero  16.829782  -99.911012      150   94308.80
4         house           Yucatán  21.052583  -89.538639      205  105191.37


In [4]:
df.head()

Unnamed: 0,property_type,state,lat,lon,area_m2,price_usd
0,house,Estado de México,19.560181,-99.233528,150,67965.56
1,house,Nuevo León,25.688436,-100.198807,186,63223.78
2,apartment,Guerrero,16.767704,-99.764383,82,84298.37
3,apartment,Guerrero,16.829782,-99.911012,150,94308.8
4,house,Yucatán,21.052583,-89.538639,205,105191.37


In [5]:
df.shape

(1736, 6)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1736 entries, 0 to 1735
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   property_type  1736 non-null   object 
 1   state          1736 non-null   object 
 2   lat            1736 non-null   float64
 3   lon            1736 non-null   float64
 4   area_m2        1736 non-null   int64  
 5   price_usd      1736 non-null   float64
dtypes: float64(3), int64(1), object(2)
memory usage: 81.5+ KB


In [None]:
# Use existing columns for hover info: price_usd, area_m2, property_type
fig = px.scatter_mapbox(df, lat="lat", lon="lon", hover_name="price_usd", hover_data=["area_m2", "property_type"], color_discrete_sequence=["fuchsia"], zoom=10, height=600)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

  fig = px.scatter_mapbox(df, lat="lat", lon="lon", hover_name="price", hover_data=["area", "rooms"], color_discrete_sequence=["fuchsia"], zoom=10, height=600)


ValueError: Value of 'hover_name' is not the name of a column in 'data_frame'. Expected one of ['property_type', 'state', 'lat', 'lon', 'area_m2', 'price_usd'] but received: price