In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [8]:
# Full path to the CSV
file_path = '/home/uwabor/Downloads/brasil-real-estate-1.csv'

# Read the file
df1 = pd.read_csv(file_path)
# View the first few rows
df1.head()

Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,"$187,230.85"
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,"$81,133.37"
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,"$154,465.45"
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,"$146,013.20"
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,"$101,416.71"


In [59]:
# removing all NAN rows from DF1
df1 = df1.dropna()
df1.head()

Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,"$187,230.85"
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,"$81,133.37"
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,"$154,465.45"
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,"$146,013.20"
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,"$101,416.71"


In [60]:
# Using the Lat-lon to create different columns

df1[["lat", "lon"]] = df1["lat-lon"].str.split(',', expand=True).astype(float)
df1.head()

Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd,lat,lon
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,"$187,230.85",-9.644305,-35.708814
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,"$81,133.37",-9.643093,-35.70484
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,"$154,465.45",-9.622703,-35.729795
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,"$146,013.20",-9.622837,-35.719556
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,"$101,416.71",-9.654955,-35.700227


In [61]:
# create state column from place_with_parent_names
df1["state"] = df1["place_with_parent_names"].str.extract(r'\|Brasil\|([^|]+)')
df1.head()

Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd,lat,lon,state
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,"$187,230.85",-9.644305,-35.708814,Alagoas
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,"$81,133.37",-9.643093,-35.70484,Alagoas
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,"$154,465.45",-9.622703,-35.729795,Alagoas
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,"$146,013.20",-9.622837,-35.719556,Alagoas
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,"$101,416.71",-9.654955,-35.700227,Alagoas


In [62]:
# converting all price_usd to floats by removing all strings and symbols
df1["price_usd"] = df1["price_usd"].astype(str) \
    .str.replace('$', '', regex=False) \
    .str.replace(',', '', regex=False) \
    .astype(float)
df1.head()

Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd,lat,lon,state
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,187230.85,-9.644305,-35.708814,Alagoas
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,81133.37,-9.643093,-35.70484,Alagoas
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,154465.45,-9.622703,-35.729795,Alagoas
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,146013.2,-9.622837,-35.719556,Alagoas
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,101416.71,-9.654955,-35.700227,Alagoas


In [63]:
# removing lat-lon and place-with-parent-names from the column
df1.drop(["lat-lon", "place_with_parent_names"], axis=1, inplace=True)
df1.head()

Unnamed: 0,property_type,region,area_m2,price_usd,lat,lon,state
0,apartment,Northeast,110.0,187230.85,-9.644305,-35.708814,Alagoas
1,apartment,Northeast,65.0,81133.37,-9.643093,-35.70484,Alagoas
2,house,Northeast,211.0,154465.45,-9.622703,-35.729795,Alagoas
3,apartment,Northeast,99.0,146013.2,-9.622837,-35.719556,Alagoas
4,apartment,Northeast,55.0,101416.71,-9.654955,-35.700227,Alagoas


In [9]:
file_path = '/home/uwabor/Downloads/brasil-real-estate-2.csv'

# Read the file
df2 = pd.read_csv(file_path)
# View the first few rows
df2.head()

Unnamed: 0,property_type,state,region,lat,lon,area_m2,price_brl
0,apartment,Pernambuco,Northeast,-8.134204,-34.906326,72.0,414222.98
1,apartment,Pernambuco,Northeast,-8.126664,-34.903924,136.0,848408.53
2,apartment,Pernambuco,Northeast,-8.12555,-34.907601,75.0,299438.28
3,apartment,Pernambuco,Northeast,-8.120249,-34.89592,187.0,848408.53
4,apartment,Pernambuco,Northeast,-8.142666,-34.906906,80.0,464129.36


In [65]:
exchange_rate = 3.19 # 1USD = 3.19 BRL

# create a new column "price_usd"
df2["price_usd"] = (df2["price_brl"] / [exchange_rate]).round(2)
df2.head()

Unnamed: 0,property_type,state,region,lat,lon,area_m2,price_brl,price_usd
0,apartment,Pernambuco,Northeast,-8.134204,-34.906326,72.0,414222.98,129850.46
1,apartment,Pernambuco,Northeast,-8.126664,-34.903924,136.0,848408.53,265958.79
2,apartment,Pernambuco,Northeast,-8.12555,-34.907601,75.0,299438.28,93867.8
3,apartment,Pernambuco,Northeast,-8.120249,-34.89592,187.0,848408.53,265958.79
4,apartment,Pernambuco,Northeast,-8.142666,-34.906906,80.0,464129.36,145495.1


In [10]:
df2.drop("price_brl", axis=1, inplace=True)
df2 = df2.dropna()
df2.head()

Unnamed: 0,property_type,state,region,lat,lon,area_m2
0,apartment,Pernambuco,Northeast,-8.134204,-34.906326,72.0
1,apartment,Pernambuco,Northeast,-8.126664,-34.903924,136.0
2,apartment,Pernambuco,Northeast,-8.12555,-34.907601,75.0
3,apartment,Pernambuco,Northeast,-8.120249,-34.89592,187.0
4,apartment,Pernambuco,Northeast,-8.142666,-34.906906,80.0


In [11]:
df = pd.concat([df1, df2])
print("df shape:", df.shape)
df.head()

df shape: (24127, 9)


Unnamed: 0,property_type,place_with_parent_names,region,lat-lon,area_m2,price_usd,state,lat,lon
0,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6443051,-35.7088142",110.0,"$187,230.85",,,
1,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.6430934,-35.70484",65.0,"$81,133.37",,,
2,house,|Brasil|Alagoas|Maceió|,Northeast,"-9.6227033,-35.7297953",211.0,"$154,465.45",,,
3,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.622837,-35.719556",99.0,"$146,013.20",,,
4,apartment,|Brasil|Alagoas|Maceió|,Northeast,"-9.654955,-35.700227",55.0,"$101,416.71",,,


In [12]:
summary_stats = df[["area_m2", "price_usd"]].describe()
summary_stats.head(10)

Unnamed: 0,area_m2
count,24127.0
mean,115.104903
std,47.807752
min,53.0
25%,76.0
50%,103.0
75%,142.0
max,252.0


In [19]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# Sample data - replace with your actual DataFrame
df = pd.DataFrame({
    'lat': [-23.55, -22.90, -12.97, -15.79, -19.92],
    'lon': [-46.63, -43.17, -38.50, -47.88, -43.94],
    'price_usd': [500000, 300000, 200000, 350000, 400000],
    'property_type': ['Apartment', 'House', 'Land', 'Condo', 'Villa']
})

# Create the map
fig = px.scatter_map(
    df,
    lat="lat",
    lon="lon",
    color="price_usd",
    hover_name="property_type",
    zoom=3
)

# Configure map settings
fig.update_layout(
    mapbox=dict(
        style="open-street-map",  # Free style that doesn't require a token
        center=dict(lat=-14.2, lon=-51.9),  # Center on Brazil
        zoom=3
    ),
    height=600,
    width=800,
    title="Property Map"
)

# For PyCharm Professional:
# Option 1: Show in browser (recommended)
pio.renderers.default = "browser"
fig.show()

# Option 2: Show in PyCharm's built-in viewer (might need configuration)
# pio.renderers.default = "png"  # Static image
# fig.show()

[13486:13486:0805/131814.005279:ERROR:content/browser/network_service_instance_impl.cc:597] Network service crashed, restarting service.
[13486:13486:0805/131815.752278:ERROR:extensions/browser/service_worker/service_worker_task_queue.cc:165] DidStartWorkerFail jnbbnacmeggbgdjgaoojpmhdlkkpblgi: 3
[13486:13486:0805/131816.104045:ERROR:extensions/browser/service_worker/service_worker_task_queue.cc:165] DidStartWorkerFail bkdgflcldnnnapblkhphbgpggdiikppg: 3
[13486:13486:0805/131816.108348:ERROR:extensions/browser/service_worker/service_worker_task_queue.cc:165] DidStartWorkerFail nkbihfbeogaeaoehlefnkodbefgpgknn: 3
[13486:13486:0805/131816.113820:ERROR:extensions/browser/service_worker/service_worker_task_queue.cc:165] DidStartWorkerFail ckejmhbmlajgoklhgbapkiccekfoccmk: 3
[13486:13486:0805/131816.295282:ERROR:extensions/browser/service_worker/service_worker_task_queue.cc:165] DidStartWorkerFail eaidebojanpehpceonghnmgdofblnlae: 3
[13585:13585:0805/131822.760156:ERROR:ui/gl/gl_surface_pre