#### Import relevant libraries

In [1]:
import pandas as pd
from bokeh.plotting import figure, show
import bokeh.plotting as bk_plot
from bokeh.io import output_notebook
output_notebook()

#### Load dataset

In [2]:
houseprices_data = pd.read_csv("data/HousingPricesData.csv")

#### Subset for relevant columns

In [3]:
houseprices_data = houseprices_data[['Zip', 'Price', 'Area', 'Room']]

#### Inspect first 2 rows and data types of the dataset

In [4]:
houseprices_data.head()

Unnamed: 0,Zip,Price,Area,Room
0,1091 CR,685000.0,64,3
1,1059 EL,475000.0,60,3
2,1097 SM,850000.0,109,4
3,1060 TH,580000.0,128,6
4,1036 KN,720000.0,138,5


In [5]:
houseprices_data.shape

(924, 4)

In [6]:
houseprices_data.dtypes

Zip       object
Price    float64
Area       int64
Room       int64
dtype: object

#### Create a price per sqm variable based on the price and area variables

In [7]:
houseprices_data['PriceperSqm'] = houseprices_data['Price']/houseprices_data['Area']

#### Sort houses based on prices

In [8]:
houseprices_sorted = houseprices_data.sort_values('Price', ascending = False)

In [9]:
houseprices_sorted.head()

Unnamed: 0,Zip,Price,Area,Room,PriceperSqm
195,1017 EL,5950000.0,394,10,15101.522843
837,1075 AH,5850000.0,480,14,12187.5
305,1016 AE,4900000.0,623,13,7865.168539
103,1017 ZP,4550000.0,497,13,9154.929577
179,1012 JS,4495000.0,178,5,25252.808989


#### Plot a bar chart using Bokeh

In [10]:
data = houseprices_sorted[0:10]

fig = figure(x_range = data['Zip'],plot_width = 700, plot_height = 500)
fig.vbar(x= data['Zip'], top = data['Price'], width = 0.9)
show(fig)

#### Add other details to make the chart more informative

In [11]:
fig = figure(x_range = data['Zip'],plot_width = 700, plot_height = 500, 
             title = 'Top 10 Areas with the highest house prices', x_axis_label = 'Zip code', 
             y_axis_label = 'House prices in millions')

fig.vbar(x= data['Zip'], top = data['Price'], width = 0.9)


fig.xaxis.axis_label_text_font_size = "15pt"
fig.xaxis.major_label_text_font_size = "10pt"
fig.yaxis.axis_label_text_font_size = "15pt"
fig.yaxis.major_label_text_font_size = "10pt"
fig.title.text_font_size = '15pt'


show(fig)


#### Create Subplots

In [12]:

p1 = figure(x_range = data['Zip'],plot_width = 480, plot_height = 400, 
             title = 'Top 10 Areas with the highest house prices', x_axis_label = 'Zip code', 
             y_axis_label = 'House prices in millions')

p1.vbar(x= data['Zip'], top = data['Price'], width = 0.9)

p2 = figure(x_range = data['Zip'],plot_width = 480, plot_height = 400, 
             title = 'Top 10 Areas with the highest house prices per sqm', x_axis_label = 'Zip code', 
             y_axis_label = 'House prices per sqm')

p2.vbar(x= data['Zip'], top = data['PriceperSqm'], width = 0.9)


gp = bk_plot.gridplot(children=[[p1, p2]])
bk_plot.show(gp)