In [None]:
!pip install google-streetview


In [None]:
import requests
import re
import pandas as pd
import time
import random
import json
import google_streetview
import google_streetview.api
from google_streetview import helpers
from os import path, makedirs
import uuid
import warnings
warnings.filterwarnings('ignore')

Function for downloading google street view images with specific titles

In [None]:

def download_links_with_names(results, dir_path, names, metadata_file='metadata.json', metadata_status='status', status_ok='OK'):

    """Download Google Street View images from parameter queries if they are available.

    Args:
      names (list): list of image titles
      dir_path (str):
        Path of directory to save downloads of images from :class:`api.results`.links
      metadata_file (str):
         Name of the file with extension to save the :class:`api.results`.metadata
      metadata_status (str):
        Key name of the status value from :class:`api.results`.metadata response from the metadata API request.
      status_ok (str):
        Value from the metadata API response status indicating that an image is available.
    """
    metadata = results.metadata
    if not path.isdir(dir_path):
      makedirs(dir_path)

    # (download) Download images if status from metadata is ok
    for i, url in enumerate(results.links):
      if metadata[i][metadata_status] == status_ok:
        file_path = path.join(dir_path, 'gsv_' + str(names[i]) + '.jpg')
        metadata[i]['_file'] = path.basename(file_path) # add file reference
        helpers.download(url, file_path)

    # (metadata) Save metadata with file reference
    metadata_path = path.join(dir_path, metadata_file)
    with open(metadata_path, 'w') as out_file:
      json.dump(metadata, out_file)

Google street view API key.
Check https://developers.google.com/maps/documentation/streetview/get-api-key to get your own api key

In [None]:
#api_key ='insert your api_key here'
api_key = ' '

In [None]:
download_path = 'detailed_images/' #path for the dateiled images from rightmove ad - this is optional
unique_ids = [] # each property willbe assign uniques ids, this ids will be used as a names for google streetview images
property_results = pd.DataFrame(columns = ['address','propertyType','bedrooms', 'detailUrl', 'location_lat', 'location_lng','property_id', 'index']) #dataframe to save property results
transaction_results = pd.DataFrame(columns = ['property_id','displayPrice','dateSold', 'tenure', 'newBuild']) # datafram to save property transactions reuslts

# lists to save location of property for google street view ( longitud, lattitudes) from rightmoe ad
locc_lat= []
locc_lng = []
property_counter = 0

In [None]:
uk_outcodes = list(pd.read_csv('outcodes.csv', header=None)[0]) #load the list of UK outcodes , https://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom

#select 10 random outcodes
selected_outcodes = random.sample(uk_outcodes,10)

In [None]:

for borough in selected_outcodes:
    index = 0
    for pages in range(39): #rightmove shows 40 pages

      # define our user headers
      headers = {
          "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36"
      }

      # the website changes if you are on page 1 as compared to other pages
      if index == 0:
          rightmove = f"https://www.rightmove.co.uk/house-prices/{borough}.html?country=england&searchLocation={borough}"

      elif index != 0:
          rightmove = f"https://www.rightmove.co.uk/house-prices/{borough}.html?page={index}"

      # request our webpage
      res = requests.get(rightmove, headers=headers)

      # check status
      res.raise_for_status()

      # scrapping sold property information
      start = '<script>window.__PRELOADED_STATE__ = '
      end   = '</script>'
      javascript_res = (res.text)[(res.text).find(start) + len(start):]
      javascript_res = javascript_res[:javascript_res.find(end)]
      javascript_res=json.loads(javascript_res)
      apartments = javascript_res['results']['properties']


      for i in apartments:
            # saving location for google street view images
            locc_lat.append(i['location']['lat'])
            locc_lng.append(i['location']['lng'])

            #creating unique ids to link property and transactions
            uniques_id = str(uuid.uuid4())
            unique_ids.append(uniques_id)

            #updating property dataframe
            data = { k: v for k, v in i.items() if k in['address','propertyType','bedrooms', 'detailUrl' ]}
            data['location_lat'] = i['location']['lat']
            data['location_lng'] = i['location']['lng']
            data['property_id'] = uniques_id
            data['index'] = [property_counter]
            property_counter = property_counter + 1
            property_results = pd.concat([property_results, pd.DataFrame.from_dict(data)])

            #updating transaction dataframe
            transaction_data = pd.DataFrame.from_dict(i['transactions'])
            transaction_data['property_id'] = uniques_id
            transaction_results = pd.concat([transaction_results, transaction_data])

            # THIS PART IS OPTIONAL load additional images from rightmove
            #rightmove has a limits of request
            #res_images = requests.get(data['detailUrl'], headers=headers)
            #res_images.raise_for_status()
            #start1 = 'window.PAGE_MODEL = '
            #end1   = '</script>'
            #javascript_image_res = (res_images.text)[(res_images.text).find(start1) + len(start1):]
            #javascript_image_res = javascript_image_res[:javascript_image_res.find(end1)]
            #javascript_image_res = json.loads(javascript_image_res)
            #if javascript_image_res['soldPropertyData']['property'] != None :
                #additional_images = javascript_image_res['soldPropertyData']['property']['images']


                #dir_path = download_path + data['property_id']+'/'
                #if not path.isdir(dir_path):
                  #makedirs(dir_path)

                #for (indx, element) in enumerate(additional_images):
                    #if (indx<2): #lets download only first two images
                        #img_url = element['url']
                        #file_path = path.join(dir_path, data['property_id']+'_' + str(indx) + '.jpg')
                        #helpers.download(img_url, file_path)

In [None]:
#loading and saving google streetview images
params = {
    'size':'640x640',
    'location':';'.join(['{},{}'.format(i,j) for (i,j)  in zip(locc_lat, locc_lng)]),
    'key': api_key,
    'heading':'0'
    }
api_list = google_streetview.helpers.api_list(params)
results = google_streetview.api.results(api_list)

In [None]:
download_links_with_names(results, 'street_view/', unique_ids)

In [None]:

property_results.drop(columns=['index'], inplace=True)
property_results.reset_index(drop=True)
property_results.to_csv('properties.csv')
transaction_results.reset_index(drop=True).to_csv('property_transactions.csv')