In [None]:
# !pip install folium
# !pip install branca

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import folium
import matplotlib
import branca

In [None]:
perth = pd.read_csv('PerthHousing.csv', parse_dates=['date_sold'], infer_datetime_format=True)

train_indices, test_indices = train_test_split(np.array(perth.index), test_size=0.2, random_state=0)

train = perth.loc[train_indices].copy()
test = perth.loc[test_indices].copy()

In [None]:
train["log_price"] = np.log(train["price"])

In [None]:
# We are only going to be using a portion of the training set
# Plotting the whole thing takes too much time
sample_train = train[:10_000]

In [None]:
price = sample_train['log_price']
colormap = branca.colormap.linear.YlOrRd_09.scale(min(price), max(price))

folium_map = folium.Map(location=(-31.96, 115.87), zoom_start=10)

for i in range(len(sample_train)):
    values = sample_train.iloc[i, :]
    lat = values["latitude"]
    lon = values["longitude"]
    price = values["log_price"]

    color = colormap(price)
    folium.CircleMarker([lat, lon], radius=0.01, color=color,
                        fill=True, fill_opacity=0.7).add_to(folium_map)
    
folium_map.add_child(colormap)
folium_map

In [None]:
q1 = train['log_price'].quantile(0.3)
q2 = train['log_price'].quantile(0.7)

def quantile_map(x):
    if x < q1: 
        return 1
    elif q1 <= x < q2:
        return 2
    return 3

train['price_quantile'] = train['log_price'].map(quantile_map)

In [None]:
train['price_quantile'].value_counts() / len(train)

In [None]:
print("Quantile 1 Cutoff:", np.exp(q1))
print("Quantile 2 Cutoff:", np.exp(q2))

In [None]:
sample_train = train[:10_000]

In [None]:
price = sample_train['price_quantile']
colormap = branca.colormap.linear.YlOrRd_09.scale(0, max(price))

folium_map = folium.Map(location=(-31.96, 115.87), zoom_start=10)

for i in range(len(sample_train)):
    values = sample_train.iloc[i, :]
    lat = values["latitude"]
    lon = values["longitude"]
    price = values["price_quantile"]

    color = colormap(price)
    folium.CircleMarker([lat, lon], radius=0.01, color=color,
                        fill=True, fill_opacity=0.7).add_to(folium_map)
    
folium_map.add_child(colormap)
folium_map

In [None]:
model = RandomForestClassifier(max_leaf_nodes=30)

x_train = train.loc[:, ['longitude', 'latitude']]
y_train = train.loc[:, 'price_quantile']
model.fit(x_train, y_train)

In [None]:
predicted_quantiles = model.predict(x_train)

In [None]:
price = predicted_quantiles
colormap = branca.colormap.linear.YlOrRd_09.scale(0, max(price))

folium_map = folium.Map(location=(-31.96, 115.87), zoom_start=9)

for i in range(len(sample_train)):
    values = sample_train.iloc[i, :]
    lat = values["latitude"]
    lon = values["longitude"]
    price = predicted_quantiles[i]

    color = colormap(price)
    folium.CircleMarker([lat, lon], radius=0.01, color=color,
                        fill=True, fill_opacity=0.7).add_to(folium_map)
    
folium_map.add_child(colormap)
folium_map

In [None]:
x_train["log_price"] = np.log(train["price"])
x_train["predicted_quantiles"] = predicted_quantiles

In [None]:
x_train.boxplot("log_price", "predicted_quantiles")

In [None]:
min_lon = train["longitude"].min()
max_lon = train["longitude"].max()

min_lat = train["latitude"].min()
max_lat = train["latitude"].max()

In [None]:
new_locations = []

for lon in np.linspace(min_lon, max_lon, 100):
    for lat in np.linspace(min_lat, max_lat, 100):
        new_locations.append((lon, lat))

In [None]:
x_temp = pd.DataFrame(new_locations, columns=["longitude", "latitude"])

predicted_quantiles = model.predict(x_temp)

In [None]:
price = predicted_quantiles
colormap = branca.colormap.linear.YlOrRd_09.scale(0, max(price))

folium_map = folium.Map(location=(-31.96, 115.87), zoom_start=9)

for i in range(len(x_temp)):
    values = x_temp.iloc[i, :]
    lat = values["latitude"]
    lon = values["longitude"]
    price = predicted_quantiles[i]

    color = colormap(price)
    folium.CircleMarker([lat, lon], radius=0.01, color=color,
                        fill=True, opacity=0.5).add_to(folium_map)
    
folium_map.add_child(colormap)
folium_map

In [None]:
train["predicted_quantile"] = model.predict(train.loc[:, ['longitude', 'latitude']])

In [None]:
train.columns

In [None]:
sns.lmplot(y='price', x="bedrooms", data=train, hue='predicted_quantile')