In [1]:
# Importing required libraries

import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
# Reading saved models

LMS = pd.read_csv('NB/LMSInfo.csv')
CLUSTERS = pd.read_csv('NB/ClusterInfo.csv')

In [3]:
#!jupyter nbextension enable --py widgetsnbextension --sys-prefix
#!jupyter serverextension enable voila --sys-prefix

In [4]:
# Input widgets

longitude = widgets.FloatText(
        description = 'Longitude',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

latitude = widgets.FloatText(
        description = 'Latitude',
        disabled=False,
        layout = widgets.Layout(width = '33%')
    )

Medianage = widgets.FloatText(
        description = 'Med. Age',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

Totalrooms = widgets.IntText(
        description = 'Rooms',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

Totalbrooms = widgets.IntText(
        description = 'Bedrooms',
        disabled = False,
        layout = widgets.Layout(width = '50%')
    )

Population = widgets.IntText(
        description = 'Population',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

Medincome = widgets.FloatText(
        description = 'Med. Income',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )

Location = widgets.RadioButtons(
        options = ['1 Hour distance from Ocean', 'Inland', 'Island', 'Near Bay', 'Near Ocean'],
        description = 'Location',
        disabled = False,
        layout = widgets.Layout(width = '33%')
    )


inputs1 = widgets.HBox([latitude, longitude, Medianage], 
                       layout = widgets.Layout(
                        justify_content = 'center',
                        width = '90%'
                        ))

inputs2 = widgets.HBox([Totalrooms, Population, Medincome], 
                       layout = widgets.Layout(
                        justify_content = 'center',
                        width = '90%'
                        ))

inputs3 = widgets.HBox([Totalbrooms, Location], 
                       layout = widgets.Layout(
                        justify_content = 'center',
                        width = '90%'
                        ))

In [5]:
# Predicting cluster of unseen data

def return_best_cluster_index(clusters, Longitude, Latitude, MedAge, TotRooms, TotBedrooms, Population, MedIncome, Location):
    L = [Longitude, Latitude, MedAge, TotRooms, TotBedrooms, Population, MedIncome]
    
    if(Location == "1 Hour distance from Ocean"):
        L.extend([1, 0, 0, 0, 0])
    elif(Location == "Inland"):
        L.extend([0, 1, 0, 0, 0])
    elif(Location == "Island"):
        L.extend([0, 0, 1, 0, 0])
    elif(Location == "Near Bay"):
        L.extend([0, 0, 0, 1, 0])
    else:
        L.extend([0, 0, 0, 0, 1])
    
    bestC = -1
    bestD = 99999999
    
    for i in range(len(clusters)):
        temp = []
        L1 = list(CLUSTERS.iloc[i, :].values)
        for j in range(len(L1)):
            temp.append(float(L1[j]) - float(L[j]))
        temp = [i**2 for i in temp]
        temp = sum(temp)
        temp = temp**0.5
        
        if(temp<bestD):
            bestD = temp
            bestC = i
        
    return L, bestC

In [6]:
# Return charges predicted using appropriate cluster's linear model

def return_Cost_pred(lms, L, c):
    cost = float(0)
    
    for i in range(1, lms.shape[1]):
        cost += float(L[i-1]) * float(lms.iloc[c, i])
    
    cost += lms.iloc[c, 0]
    
    return cost

In [7]:
# On button click function definition

def on_button_clicked(event):
    with output:
        clear_output()
        lat = float(latitude.value)
        lon = float(longitude.value)
        Age = float(Medianage.value)
        rooms = float(Totalrooms.value)
        pop = int(Population.value)
        inc = float(Medincome.value)
        bed = int(Totalbrooms.value)
        loc = list({Location.value})[0]
        
        if(lat<32 or lat>42):
            display(
            widgets.HTML(value = "<h2><center>Project restricted to California latitude (32 to 42)</center></h2>")
            )
            return
        
        if(lon<-124 or lon>-115):
            display(
            widgets.HTML(value = "<h2><center>Project restricted to California longitude (-124 to -115)</center></h2>")
            )
            return
        
        if(Age<20 or Age>100):
            display(
            widgets.HTML(value = "<h2><center>Project restricted housing age (20 year to 100 years)</center></h2>")
            )
            return
        
        
        if(rooms<2500 or rooms>50000):
            display(
            widgets.HTML(value = "<h2><center>Project restricted total number of rooms (2500 to 50,000)</center></h2>")
            )
            return
        
        if(pop<1400 or pop>50000):
            display(
            widgets.HTML(value = "<h2><center>Project restricted to population (1400 to 50,000)</center></h2>")
            )
            return
        
        if(inc<300 or inc>20000):
            display(
            widgets.HTML(value = "<h2><center>Project restricted to median income ($300 to $20,000)</center></h2>")
            )
            return
        
        if(bed<500 or bed>10000):
            display(
            widgets.HTML(value = "<h2><center>Project restricted total number of bedrooms (500 to 10,000)</center></h2>")
            )
            return
        
        L, Best_C = return_best_cluster_index(CLUSTERS, lon, lat, Age, rooms, bed, pop, inc, loc)
    
        Cost_predicted = return_Cost_pred(LMS, L, Best_C)
        
        if(Cost_predicted<15000):
            Cost_predicted = 15000
            
        if(Cost_predicted>2000000):
            Cost_predicted = 2100000
        
        display(
            widgets.HTML(value = "<h2><center>Cost predicted: $" + str(round(Cost_predicted, 2)) + "</center></h2>")
        )

In [8]:
# Calculate button

calculate = widgets.Button(
            description = 'Calculate',
            layout = widgets.Layout(width = '100%')
        )

calculate.on_click(on_button_clicked)

In [9]:
# Output widget

output = widgets.Output()

OutputHbox = widgets.HBox([output],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

In [10]:
# Line breaker

text_0 = widgets.HTML(value = "<h1></h1>", 
                     layout = widgets.Layout(
                     align_items = 'center',
                     )
                    )

In [11]:
# Heading

text_1 = widgets.HTML(value = "<h1><b><center>California Housing Prices</center></b></h1>")

# Sub heading

text_2 = widgets.HTML(value = "<h3><center>Median house prices for California districts prediction</center></h3>")

headings = widgets.VBox([text_1, text_2, text_0])

# About dataset

text_4 = widgets.HTML(value = """
<table style="width: 100%; border-collapse: collapse; border-style: solid;" border="2" cellpadding="20">
<tbody>
<tr>
<td style="width: 100%;">
<h2 style="text-align: justify;"><strong>About the dataset</strong></h2>
<p style="text-align: justify;">This is the dataset used in the second chapter of Aur&eacute;lien G&eacute;ron's recent book 'Hands-On Machine learning with Scikit-Learn and TensorFlow'. It serves as an excellent introduction to implementing machine learning algorithms because it requires rudimentary data cleaning, has an easily understandable list of variables and sits at an optimal size between being to toyish and too cumbersome.</p>
<p style="text-align: justify;">The data contains information from the 1990 California census. So although it may not help you with predicting current housing prices like the Zillow Zestimate dataset, it does provide an accessible introductory dataset for teaching people about the basics of machine learning. The data pertains to the houses found in a given California district and some summary stats about them based on the 1990 census data.</p>
<p style="text-align: justify;">&nbsp;</p>
<p style="text-align: justify;">The columns are as follows:</p>
<ul>
<li style="text-align: justify;">longitude</li>
<li style="text-align: justify;">latitude</li>
<li style="text-align: justify;">housing median&nbsp; age</li>
<li style="text-align: justify;">total rooms</li>
<li style="text-align: justify;">total bedrooms</li>
<li style="text-align: justify;">population</li>
<li style="text-align: justify;">households</li>
<li style="text-align: justify;">median income</li>
<li style="text-align: justify;">median house value (target)</li>
<li style="text-align: justify;">ocean proximity</li>
</ul>
</td>
</tr>
</tbody>
</table>
""")

text_5 = widgets.HTML(value = """
<h2>Hybrid Approach Used: Clustering + Linear Regression</h2>
<p style="text-align: justify;">A variety of different regression models were tested for the given dataset including Linear, Huber, Orthogonal Matching Point, etc. (refer table below) and linear regression (multivarialte) was found to be the best model among all applied models having a R2 score of 0.6473.</p>
<p>&nbsp;</p>
""")

text_7 = widgets.HTML(value = """
<h4>* LM: Linear Model</h4>
<p style="text-align: justify;">Combining with clustering, and applying a hybrid approach (refer figure above) the R2 square is furthur increased by 6.16% to 0.6872. The approach consists of splitting the original dataset into diffrent clusters and then using a linear model for each cluster. The optimal number of clusters have been identified by varing cluster numbers from 2 to 50, and analysing the R2 score for predictiond on test data. For new unseen data, firstly the cluster is predicted and then the linear model corresponding to that particular cluster is applied to obtain the final output.</p>
<p>&nbsp;</p>
""")

# Image 1

image1 = open("image1.png", "rb")
image1 = image1.read()
image1 = widgets.Image(
    value = image1,
    format = 'png',
    width = 1000,
    height = 600,
)


# Image 2

image2 = open("image2.png", "rb")
image2 = image2.read()
image2 = widgets.Image(
    value = image2,
    format = 'png',
    width = 800,
    height = 600,
)

ImageHbox1 = widgets.HBox([image1],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

ImageHbox2 = widgets.HBox([image2],
                        layout = widgets.Layout(
                        justify_content = 'center',
                        )
                    )

text_6 = widgets.HTML(value = "<h1><center>Live Demo</center></h1>")

In [12]:
# Displaying rendered Web Page

page = widgets.VBox([text_0, text_1, text_2, text_0, text_0, text_4, text_0, text_5, ImageHbox1, text_0, text_0, ImageHbox2, text_7, text_0, text_6, text_0, text_0, inputs1, text_0, text_0, inputs2, text_0, text_0, inputs3, text_0, calculate, output, text_0, text_0, text_0])
display(page)

VBox(children=(HTML(value='<h1></h1>', layout=Layout(align_items='center')), HTML(value='<h1><b><center>Califo…