# Moving

Analysis and mapping of data to help us make a decision of where to move next in London.

- Price Paid
- OpenStreetMap (for points of interest, cycle lanes)
- TfL cycle network
- ONS census data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

## Data processing



### Statistical data

- House prices per square metre, source: [London Datastore](https://data.london.gov.uk/dataset/house-price-per-square-metre-in-england-and-wales)
- MSOA nice names, source: [House of Commons](https://houseofcommonslibrary.github.io/msoanames/)
- MSOA household income, source [ONS]
- MSOA population, source [ONS]

In [28]:
boroughs = ["Barking_and_Dagenham", "Barnet", "Bexley", "Brent", "Bromley", 
            "Camden", "Croydon", "Ealing", "Enfield", "Greenwich", "Hackney",
            "Hammersmith_and_Fulham", "Haringey", "Harrow", "Hillingdon", 
            "Islington", "Kensington_and_Chelsea", "Kingston_upon_Thames", "Lambeth", 
            "Lewisham", "Merton", "Newham", "Redbridge", "Richmond_upon_Thames", 
            "Southwark", "Sutton", "Tower_Hamlets", "Waltham_Forest",
            "Wandsworth", "Westminster"
            ]

In [31]:
# load house price data
prices_df = []
for borough in boroughs:
    df = pd.read_csv("hpm_la_2022\\" + borough + "_link_13082022.csv")
    prices_df.append(df)

prices_df = pd.concat(prices_df).reset_index(drop=True)

In [72]:
len(prices_df)

2378196

In [None]:
# load other data

# postcode data
postcodes_df = pd.read_csv("London_postcodes.csv", low_memory=False)

# income data
inc_df = pd.read_excel("saiefy1920finalqaddownload280923.xlsx", sheet_name="Total annual income", skiprows=4)

# population data
pop_df = pd.read_excel("sape23dt4mid2020msoasyoaestimatesunformatted.xlsx", sheet_name="Mid-2020 Persons", skiprows=4)

# MSOA nice names
msoa_names = pd.read_csv("MSOA-Names-2.2.csv")

In [80]:
# merge data

# MSOA code
df = pd.merge(prices_df, postcodes_df[["Postcode", 'LSOA21 Code','MSOA21 Code']], left_on="postcode", right_on="Postcode", how="left")

# merge household income
df = pd.merge(df, inc_df[["MSOA code", "Total annual income (£)"]], left_on="MSOA21 Code", right_on="MSOA code", how="left")

# population
df = pd.merge(df, pop_df[["MSOA Code", "All Ages"]], left_on="MSOA21 Code", right_on="MSOA Code", how="left")

# MSOA nice names
df = pd.merge(df, msoa_names[["msoa21cd", "msoa21hclnm", "localauthorityname"]], left_on="MSOA21 Code", right_on="msoa21cd", how="left")

In [81]:
# clean up
df = df.drop(columns=['MSOA code', 'MSOA Code', 'msoa21cd']).rename(columns={ 'Total annual income (£)': "Household income", 
                                                                     "All Ages": "Population",
                                                                     "msoa21hclnm": "MSOA_nicename"})

In [92]:
# aggregate data by MSOA
df_grouped = df.groupby([ "lad21cd", "localauthorityname","MSOA21 Code", "MSOA_nicename", "year", "propertytype", "numberrooms", 'Household income', 'Population']).agg({
    "priceper": "mean",
    "dateoftransfer": "count",
    "price": "mean",
    "CURRENT_ENERGY_EFFICIENCY": "mean"
    }).reset_index()

df_grouped = df_grouped.rename(columns={"priceper": "avg_priceper", "dateoftransfer":"total_counts", "price": "avg_price", "CURRENT_ENERGY_EFFICIENCY": "avg_curr_energy_efficiency"})
df_grouped

### Geo data 

Get points of interest and boundaries for mapping

- Cafes
- Cycle lanes
- MSOA boundaries


## Explore data

## Playground

In [None]:
## Load Price paid data 
# --> didn't use as does not contain size info
pp_columns = ["Identifier", "Price", "Date_of_transfer", "Postcode", "Property_type", "New", 
           "Duration", "PAON", "SAON", "Street", "Locality", "City", "District", "County", "PPD_category", "Record_status"   ]

pp_df = pd.read_csv("pp-complete.csv", names=pp_columns)

pp_df_london = pp_df[pp_df["County"]=="Greater London".upper()]
pp_df_london.to_csv("pp_complete_london.csv", index=False)
pp_df = pd.read_csv("pp_complete_london.csv")