## Introduction

The purpose of this exploration is to scrape data from Rightmove property website and convert it into a format where we can analyse such as into a pandas DataFrame, or a .csv file.

In [40]:
rightmove_url = "https://www.rightmove.co.uk/property-for-sale/find.html?searchType=SALE&locationIdentifier=REGION%5E94022&insId=1&radius=0.0&minPrice=&maxPrice=&minBedrooms=&maxBedrooms=&displayPropertyType=&maxDaysSinceAdded=&_includeSSTC=on&sortByPriceDescending=&primaryDisplayPropertyType=&secondaryDisplayPropertyType=&oldDisplayPropertyType=&oldPrimaryDisplayPropertyType=&newHome=&auction=false"

In [41]:
# imports
from lxml import html, etree
import requests
import pandas as pd
import datetime as dt

In [42]:
response = requests.get(rightmove_url)

In [43]:
response.status_code

200

In [44]:
response.content

b'<!DOCTYPE html>\n<html lang="en-GB" class="is-not-modern property-for-sale channel--buy ">\n<head>\n    <meta charset="utf-8">\n    <title>Properties For Sale in Liverpool City Centre - Flats &amp; Houses For Sale in Liverpool City Centre - Rightmove</title>\n    <meta http-equiv="X-UA-Compatible" content="IE=Edge"/>\n    <meta name="viewport" content="width=device-width, shrink-to-fit=no, initial-scale=1.0, user-scalable=no"/>\n    <meta name="format-detection" content="telephone=no"/>\n    <meta name="HandheldFriendly" content="True"/>\n    <meta name="description" content="Find Properties For Sale in Liverpool City Centre - Flats &amp; Houses For Sale in Liverpool City Centre - Rightmove. Search over 900,000 properties for sale from the top estate agents and developers in the UK - Rightmove."/>\n        <meta name="referrer" content="origin-when-cross-origin">\n    \n    <link rel="preconnect" href="https://media.rightmove.co.uk:443" crossorigin/>\n    <link rel="preconnect" href=

In [45]:
xp_prices = '//div[@class="propertyCard-priceValue"]/text()'
xp_weblinks = '//div[@class="propertyCard-details"]//a[@class="propertyCard-link"]/@href'
xp_titles = '//div[@class="propertyCard-details"]//a[@class="propertyCard-link"]\
//h2[@class="propertyCard-title"]/text()'
xp_addresses = '//address[@class="propertyCard-address"]/span/text()'

In [46]:
tree = html.fromstring(response.content)

In [47]:
price_pcm, titles, addresses, weblinks = [], [], [], []
for val in tree.xpath(xp_prices):
    price_pcm.append(val)
for val in tree.xpath(xp_titles):
    titles.append(val)
for val in tree.xpath(xp_addresses):
    addresses.append(val)
for val in tree.xpath(xp_weblinks):
    weblinks.append('http://www.rightmove.co.uk'+val)

In [48]:
len(price_pcm), len(titles), len(weblinks), len(addresses)

(25, 25, 25, 25)

In [49]:
tree.xpath(xp_addresses)

['The Residence, Water Street, Liverpool, Merseyside, L2, WATER19',
 'Moorfields, Liverpool, L2 2BS',
 'The Strand, Liverpool, Merseyside, L2',
 'Greenland Street, Liverpool, Merseyside, L1',
 'Unity Building Rumford Place, Liverpool',
 'Rumford Place, Liverpool, Merseyside, L3',
 'Old Hall Street, Liverpool, L3',
 'The Strand, Liverpool, Merseyside, L2',
 'William Jessop Way, Liverpool, Merseyside, L3',
 'Old Hall Street, Liverpool, L3',
 'William Jessop Way, Liverpool',
 'Mann Island, Liverpool, Merseyside, L3',
 'The Strand, Liverpool, Merseyside, L2',
 'Tower Building, 22 Water Street, Liverpool, L3',
 'Mann Island, Liverpool, Merseyside, L3',
 'Navigation Wharf, Liverpool, L3',
 'Tower Building, Water Street, Liverpool, L3',
 'Tower Building, Water Street, Liverpool, L3',
 '19 Princes Parade, Liverpool, L3',
 'Tower Building, Water Street, Liverpool, L3',
 'The Colonnades Albert Dock, Liverpool, L3',
 'Quebec Quay, Liverpool, L3',
 'Tower Building, Water Street, Liverpool, L3',
 '

In [52]:
data = [price_pcm, titles, weblinks, addresses]
temp_df = pd.DataFrame(data)
temp_df = temp_df.transpose()
temp_df.columns = ['price','type','url', 'address']

In [53]:
temp_df

Unnamed: 0,price,type,url,address
0,"£169,995",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Residence, Water Street, Liverpool, Mersey..."
1,"£3,300,000",\n Studio flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Moorfields, Liverpool, L2 2BS"
2,"£2,565,000",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Strand, Liverpool, Merseyside, L2"
3,"£1,550,001",\n 3 bedroom penthouse for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Greenland Street, Liverpool, Merseyside, L1"
4,"£900,000",\n 4 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Unity Building Rumford Place, Liverpool"
5,"£899,950",\n 3 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Rumford Place, Liverpool, Merseyside, L3"
6,"£795,000",\n 3 bedroom flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Old Hall Street, Liverpool, L3"
7,"£750,000",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Strand, Liverpool, Merseyside, L2"
8,"£748,500",\n 1 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"William Jessop Way, Liverpool, Merseyside, L3"
9,"£700,000",\n 3 bedroom flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Old Hall Street, Liverpool, L3"


In [187]:
temp_df = temp_df[temp_df.url != 'http://www.rightmove.co.uk']

In [54]:
temp_df['number_bedrooms'] = temp_df.type.str.extract(r'\b([\d][\d]?)\b',expand=True)
temp_df.loc[temp_df['type'].str.contains('studio',case=False),'number_bedrooms']=0

In [55]:
temp_df

Unnamed: 0,price,type,url,address,number_bedrooms
0,"£169,995",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Residence, Water Street, Liverpool, Mersey...",2.0
1,"£3,300,000",\n Studio flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Moorfields, Liverpool, L2 2BS",0.0
2,"£2,565,000",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Strand, Liverpool, Merseyside, L2",2.0
3,"£1,550,001",\n 3 bedroom penthouse for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Greenland Street, Liverpool, Merseyside, L1",3.0
4,"£900,000",\n 4 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Unity Building Rumford Place, Liverpool",4.0
5,"£899,950",\n 3 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"Rumford Place, Liverpool, Merseyside, L3",3.0
6,"£795,000",\n 3 bedroom flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Old Hall Street, Liverpool, L3",3.0
7,"£750,000",\n 2 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"The Strand, Liverpool, Merseyside, L2",2.0
8,"£748,500",\n 1 bedroom apartment for sale ...,http://www.rightmove.co.uk/property-for-sale/p...,"William Jessop Way, Liverpool, Merseyside, L3",1.0
9,"£700,000",\n 3 bedroom flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,"Old Hall Street, Liverpool, L3",3.0
