Scotland-Housing-Stock-by-Tenure Stock by Tenure and Stock by tenure by LA

In [1]:
from gssutils import *

scraper = Scraper('https://www2.gov.scot/Topics/Statistics/Browse/Housing-Regeneration/HSfS/KeyInfoTables')
scraper

## Stock by tenure

### Description

These figures provide the estimated number of dwellings in Scotland by tenure.
They are updated each year using information from Housing Statistics Annual
Returns, and Scottish Household Survey data.

The figures presented here are based on March 2017 because this is the latest
available Scottish Household Survey dataset.

The excel file includes 2 tables:

  * Estimated stock of dwellings by tenure - calendar year **time series** from 1993 to 2000 and financial year time series from 2001-02 to 2016-17, at Scotland level. Please note the change in methodology from 2001, which is fully explained in the notes in the downloadable excel file.
  * Estimated stock of dwellings by tenure and **local authority** , most recent year only.



### Distributions

1. Stock by tenure 2017  ([MS Excel Spreadsheet](https://www2.gov.scot/Resource/0054/00540622.xls))


In [2]:
tabs = scraper.distribution().as_databaker()

In [3]:
tab = next(t for t in tabs if t.name=='Tbl Stock by Tenure')
cell = tab.filter('Estimated stock of dwellings by tenure: 1993 to 2017')
year = cell.fill(DOWN).is_not_blank().is_not_whitespace()
tenure =  cell.shift(0,2).fill(RIGHT).is_not_blank().is_not_whitespace() | \
            cell.shift(0,3).fill(RIGHT).is_not_blank().is_not_whitespace()
observations = tenure.fill(DOWN).is_not_blank().is_not_whitespace().is_number()

In [4]:
Dimensions = [
            HDim(year,'Year',DIRECTLY,LEFT),
            HDim(tenure,'Tenure',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit', 'dwellings-thousands')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
table = c1.topandas()




In [5]:
tidy = pd.DataFrame()

In [6]:
table['Tenure'] = table['Tenure'].map(
    lambda x: {
        'Total number of dwellings (000s)' : 'total', 
        'Owner occupied' : 'privately-owned-dwellings/owner-occupied',
       'Rented privately or with a job/business (note this includes households living rent-free) 3' : 'privately-owned-dwellings/rented-privately-or-with-a-job-business',
       'From housing associations 4' : 'socially-rented-dwellings/from-housing-associations',
       'From local authorities, New Towns, Scottish Homes' : 'socially-rented-dwellings/from-local-authorities-new-towns-scottish-homes',
       'Total number of occupied dwellings (000s)' : 'occupied',
       'Total number of vacant dwellings (000s)' : 'all-vacants',
       'Total number occupied dwellings' : 'privately-owned-dwellings/occupied',
       'Vacant private dwellings and second homes': 'privately-owned-dwellings/vacant-private-dwellings-and-second-homes'
        }.get(x, x))

In [7]:
table['Year'] = table['Year'].str.strip()

In [8]:
table['Period'] = table['Year'].astype(str).str[-4:] + ' ' +  table['Year'].astype(str).str[:3]

In [9]:
import re
YEAR_RE = re.compile(r'[0-9]{4}')
YEAR_MONTH_RE = re.compile(r'([0-9]{4})\s+(JAN|FEB|Mar|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|Dec)')
YEAR_QUARTER_RE = re.compile(r'([0-9]{4})\s+(Q[1-4])')

class Re(object):
  def __init__(self):
    self.last_match = None
  def fullmatch(self,pattern,text):
    self.last_match = re.fullmatch(pattern,text)
    return self.last_match

def time2period(t):
    gre = Re()
    if gre.fullmatch(YEAR_RE, t):
        return f"year/{t}"
    elif gre.fullmatch(YEAR_MONTH_RE, t):
        year, month = gre.last_match.groups()
        month_num = {'JAN': '01', 'FEB': '02', 'Mar': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
                     'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'Dec': '12'}.get(month)
        return f"month/{year}-{month_num}"
    elif gre.fullmatch(YEAR_QUARTER_RE, t):
        year, quarter = gre.last_match.groups()
        return f"quarter/{year}-{quarter}"
    else:
        print(f"no match for {t}")

table['Period'] = table['Period'].apply(time2period)

In [10]:
import numpy as np
table['OBS'].replace('', np.nan, inplace=True)
table.dropna(subset=['OBS'], inplace=True)
table.rename(columns={'OBS': 'Value'}, inplace=True)
table['Value'] = table['Value'].astype(int)

In [11]:
table['Geography'] = 'S92000003'

In [12]:
table = table[['Period','Geography','Tenure','Measure Type','Value','Unit']]

In [13]:
tidy = pd.concat([tidy , table])

In [14]:
tab1 = next(t for t in tabs if t.name == 'Tbl Stock by tenure by LA')
cell1 = tab1.filter('Estimated stock of dwellings by tenure and local authority: 2017')
geo = cell1.fill(DOWN).is_not_blank().is_not_whitespace()
tenure1 =  cell1.shift(0,1).fill(RIGHT).is_not_blank().is_not_whitespace() | \
            cell1.shift(0,2).fill(RIGHT).is_not_blank().is_not_whitespace()
observations1 = tenure1.fill(DOWN).is_not_blank().is_not_whitespace().is_number()

In [15]:
Dimensions1 = [
            HDim(geo,'Geography',DIRECTLY,LEFT),
            HDim(tenure1,'Tenure',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit', 'dwellings-thousands')
            ]
c2 = ConversionSegment(observations1, Dimensions1, processTIMEUNIT=True)

In [16]:
table1 = c2.topandas()




In [17]:
table1['Tenure'] = table1['Tenure'].map(
    lambda x: {
        'Total number of dwellings (000s)' : 'total', 
        'Owner occupied' : 'privately-owned-dwellings/owner-occupied',
       'Rented privately or with a job/business (note this includes households living rent-free) 3' : 'privately-owned-dwellings/rented-privately-or-with-a-job-business',
       'From housing associations 4' : 'socially-rented-dwellings/from-housing-associations',
       'From local authorities, New Towns, Scottish Homes' : 'socially-rented-dwellings/from-local-authorities-new-towns-scottish-homes',
       'Total number of occupied dwellings (000s)' : 'occupied',
       'Total number of vacant dwellings (000s)' : 'all-vacants',
       'Total number occupied dwellings' : 'privately-owned-dwellings/occupied',
       'Vacant private dwellings and second homes': 'privately-owned-dwellings/vacant-private-dwellings-and-second-homes'
        }.get(x, x))

In [18]:
table1['Period'] = 'month/2017-03'

In [19]:
import numpy as np
table1['OBS'].replace('', np.nan, inplace=True)
table1.dropna(subset=['OBS'], inplace=True)
table1.rename(columns={'OBS': 'Value'}, inplace=True)
table1['Value'] = table1['Value'].astype(int)

In [20]:
scotland_gss_codes = pd.read_csv('scotland-gss.csv', index_col='Area')
table1['Geography'] = table1['Geography'].map(
    lambda x: scotland_gss_codes.loc[x]['Code']
)

In [21]:
table1 = table1[['Period','Geography','Tenure','Measure Type','Value','Unit']]

In [22]:
tidy = pd.concat([tidy , table1])

In [23]:
out = Path('out')
out.mkdir(exist_ok=True)
tidy.drop_duplicates().to_csv(out / 'observations.csv', index = False)

In [24]:
scraper.dataset.family = 'housing'
scraper.dataset.theme = THEME['housing-planning-local-services']
with open(out / 'dataset.trig', 'wb') as metadata:
    metadata.write(scraper.generate_trig())
    
schema = CSVWSchema('https://ons-opendata.github.io/ref_housing/')
schema.create(out / 'observations.csv', out / 'observations.csv-schema.json')

In [25]:
tidy

Unnamed: 0,Period,Geography,Tenure,Measure Type,Value,Unit
0,month/1993-12,S92000003,total,Count,2193,dwellings-thousands
1,month/1993-12,S92000003,privately-owned-dwellings/owner-occupied,Count,1217,dwellings-thousands
2,month/1993-12,S92000003,privately-owned-dwellings/rented-privately-or-...,Count,154,dwellings-thousands
3,month/1993-12,S92000003,socially-rented-dwellings/from-housing-associa...,Count,67,dwellings-thousands
4,month/1993-12,S92000003,socially-rented-dwellings/from-local-authoriti...,Count,755,dwellings-thousands
5,month/1994-12,S92000003,total,Count,2210,dwellings-thousands
6,month/1994-12,S92000003,privately-owned-dwellings/owner-occupied,Count,1258,dwellings-thousands
7,month/1994-12,S92000003,privately-owned-dwellings/rented-privately-or-...,Count,155,dwellings-thousands
8,month/1994-12,S92000003,socially-rented-dwellings/from-housing-associa...,Count,77,dwellings-thousands
9,month/1994-12,S92000003,socially-rented-dwellings/from-local-authoriti...,Count,721,dwellings-thousands
