# Housing descriptions 

This nb looks at the information contained in the descriptions of the planning applications. 

In [None]:
import pandas as pd
import re 

from elasticsearch import Elasticsearch

import sys
# Import custom functions which help process and format the planning data 
sys.path.append('functions')
import preprocessing_fncs as ppf
import elastic_search_fncs as esf

### Connect to the Planning London Datahub

In [2]:
# Details of the dataset
db_host = 'https://athena.london.gov.uk'
db_user = 'odbc_readonly'
db_pass = 'odbc_readonly'
db_port = '10099'
db_name = 'gla-ldd-external'

# Creates connection to the dataset
es = Elasticsearch(
    [f"{db_host}:{db_port}"],
    http_auth=(db_user, db_pass)
)

# Check connection
if es.ping():
    print("Connected to Elasticsearch!")
else:
    print("Could not connect to Elasticsearch.")

Connected to Elasticsearch!


  es = Elasticsearch(


### Process planning application data 

In [3]:
# The request return all planning applications over a five year period, starting 1/1/2015, with at least 1 social housing unit gain. 
df = esf.social_units_x_query(es, x_res_units=1, since_year="01/01/2015", to_year="01/01/2025")

# Format the data 
df = ppf.format_df(df)

print(f'Number of residential planning applications since 1/1/2022: {len(df)}')

Number of residential planning applications since 1/1/2022: 1118


In [6]:
self_build = 'total_no_proposed_residential_units_self_build_and_custom_build'

social_housing = 'total_no_proposed_residential_units_social_rent'

affordable_rent = ['total_no_proposed_residential_units_london_living_rent',
                   'total_no_proposed_residential_units_discount_market_rent_charged_at_london_rents',
                   'total_no_proposed_residential_units_london_affordable_rent',
                   'total_no_proposed_residential_units_discount_market_rent']

affordable_sale = ['total_no_proposed_residential_units_shared_equity',
                   'total_no_proposed_residential_units_london_shared_ownership',
                   'total_no_proposed_residential_units_discount_market_sale',
                   'total_no_proposed_residential_units_starter_homes']

market_rent = 'total_no_proposed_residential_units_market_for_rent'

market_sale = 'total_no_proposed_residential_units_market_for_sale'

In [8]:
df['self_build'] = pd.to_numeric(df[self_build])
df['social_housing'] = pd.to_numeric(df[social_housing])
df['affordable_rent'] = pd.to_numeric(df[affordable_rent].sum(axis=1))
df['affordable_sale'] = pd.to_numeric(df[affordable_sale].sum(axis=1))
df['market_rent'] = pd.to_numeric(df[market_rent])
df['market_sale'] = pd.to_numeric(df[market_sale])

In [9]:
def generate_build_type(df):
    def determine_build_type(row):
        if row['social_housing'] == row['total_no_proposed_residential_units']:
            return 'Social housing'
        else:
            return 'Mixed social housing'

    df['build_type'] = df.apply(determine_build_type, axis=1)
    return df

# Apply the function to the dataframe
df = generate_build_type(df)

### Regex matching of terms 

In [10]:
descriptions = df['description'].astype(str)

regex_term = ['infill', 'demolition', 'community', 'clearance', 'cycle', 'heatpump', 'solar panels', 'non material']

for term in regex_term:
    regex = re.compile(rf'\b(?:{term})\b', re.IGNORECASE)
    matches = []
    for text in descriptions:
        if regex.search(text):
            matches.append(text)
    print(f"Matches for '{term}': {len(matches)}")

Matches for 'infill': 18
Matches for 'demolition': 526
Matches for 'community': 149
Matches for 'clearance': 13
Matches for 'cycle': 444
Matches for 'heatpump': 0
Matches for 'solar panels': 4
Matches for 'non material': 8
