## Mount the Drive, and Change to Google Drive Folder

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

%cd /content/drive/MyDrive/MSc.-Dissertations/1/Files
%ls

Mounted at /content/drive
/content/drive/MyDrive/MSc.-Dissertations/1/Files
classification.ipynb  properties.csv      Robin.ipynb
flist.txt             randomsample.ipynb  [0m[01;34mstreet_view[0m/


## Import Libraries

In [2]:
import tensorflow as tf
from tensorflow import keras
from keras import models, layers, utils
from keras.wrappers import scikit_learn
from keras.models import *
from keras.layers import *
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from keras.utils import np_utils

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score, KFold
from sklearn.pipeline import Pipeline

import random
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

## Count the Number of Files, and Take Random Samples from the Image Files

In [3]:
# !ls street_view
# count how many files and write the filenames into a file
# !ls street_view -1 | wc -l 
# !ls street_view/*.jpg > flist.txt
flist = list(pd.read_csv('flist.txt', header = None)[0])

# Set seed so sample is reproducible 
random.seed(99)  # set this to an integer value!!!
nsamp = 100
flist_sub = random.sample(flist, nsamp)
flist = flist_sub

# print(flist)

## Overview of the `properties` Dataset

In [4]:
properties = pd.read_csv('properties.csv')
properties.head()

Unnamed: 0.1,Unnamed: 0,address,propertyType,bedrooms,detailUrl,location_lat,location_lng,property_id
0,0,"12, Gorsey Brigg, Dronfield Woodhouse, Dronfie...",Terraced,3.0,https://www.rightmove.co.uk/house-prices/detai...,53.29986,-1.49446,60d9dd15-c5a0-4d9c-a341-a1d47add49d5
1,0,"5, Highgate Lane, Dronfield, Derbyshire S18 1UB",Detached,4.0,https://www.rightmove.co.uk/house-prices/detai...,53.29135,-1.45975,4a586e80-181a-4b82-b5c3-2d789436bb14
2,0,"125, Gosforth Lane, Dronfield, Derbyshire S18 1RB",Detached,3.0,https://www.rightmove.co.uk/house-prices/detai...,53.29763,-1.47573,93680b6c-237e-44d3-8f40-959a14b80cad
3,0,"80, Shakespeare Crescent, Dronfield, Derbyshir...",Detached,3.0,https://www.rightmove.co.uk/house-prices/detai...,53.29259,-1.45644,5d49758b-f148-4d06-bbae-3eb23f5c68fb
4,0,"21, Gainsborough Road, Dronfield, Derbyshire S...",Detached,,https://www.rightmove.co.uk/house-prices/detai...,53.2974,-1.48503,4645f5eb-de7c-474f-8d7e-b59fa8c55f19


In [5]:
properties.propertyType = properties.propertyType.astype('category')
properties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17550 entries, 0 to 17549
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   Unnamed: 0    17550 non-null  int64   
 1   address       17550 non-null  object  
 2   propertyType  17550 non-null  category
 3   bedrooms      11505 non-null  float64 
 4   detailUrl     17550 non-null  object  
 5   location_lat  17550 non-null  float64 
 6   location_lng  17550 non-null  float64 
 7   property_id   17550 non-null  object  
dtypes: category(1), float64(3), int64(1), object(3)
memory usage: 977.2+ KB


In [6]:
properties.describe()

Unnamed: 0.1,Unnamed: 0,bedrooms,location_lat,location_lng
count,17550.0,11505.0,17550.0,17550.0
mean,0.0,2.871186,52.912264,-2.330492
std,0.0,1.010339,1.83383,1.262468
min,0.0,0.0,50.61708,-4.26895
25%,0.0,2.0,51.23283,-3.06729
50%,0.0,3.0,53.095885,-2.658955
75%,0.0,3.0,53.84676,-1.71275
max,0.0,6.0,55.91054,0.71999


In [7]:
properties.propertyType.value_counts()

Detached         4134
Semi-Detached    4056
Unknown          3900
Terraced         3666
Flat             1794
Name: propertyType, dtype: int64

## A Subsample of the `properties` Dataset

In [8]:
flist_id = list(map(lambda string: string[16 : -4], flist))
properties_sub = pd.DataFrame(properties.loc[properties['property_id'].isin(flist_id)])

In [9]:
Img_array = np.array(list(map(lambda x: np.array(Image.open(x)) / 255, flist)))
Img_list = list(Img_array)
dic_propid_Img = dict(zip(properties_sub.property_id, Img_list))

In [10]:
onehot_encoder = LabelEncoder()
onehot_encoder.fit(properties_sub.propertyType)
encoded_propertyType = onehot_encoder.transform(properties_sub.propertyType)
dummy_propertyType = np_utils.to_categorical(encoded_propertyType)

## Multi-Class Classification Using Neural Network

In [11]:
def mlp(output_dim):

    '''
    Creates a multi-layer perceptron neural network model without hidden layers.

    Parameter:
    output_dim (int): The number of output classes.
    
    Returns:
    A compiled Keras model.
    '''

    model = Sequential()
    model.add(Flatten())
    model.add(Dense(output_dim, activation = tf.nn.softmax))
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

In [12]:
history = mlp(dummy_propertyType.shape[1]).fit(Img_array, dummy_propertyType, epochs = 20, batch_size = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
