In [4]:
import pandas as pd
from pathlib import Path

STORAGE_FOLDER = Path(".").resolve()

In [None]:
column_names = [
    "Transaction unique identifier",
    "Price",
    "Date of Transfer",
    "Postcode",
    "Property Type",
    "Old/New",
    "Duration",
    "PAON",
    "SAON",
    "Street",
    "Locality",
    "Town/City",
    "District",
    "County",
    "PPD Category Type",
    "Record Status",
]

df_2024 = pd.read_csv(STORAGE_FOLDER / "data" / "pp-2024.csv", header=None, names=column_names)
df_2024["year"] = 2024

df = df_2024.copy()

# Filter for only Category Type A (Standard Price Paid entries)
df = df[df["PPD Category Type"] == "A"]

df = df.reset_index(drop=True)

In [None]:
# Load postcode to constituency mapping
postcodes = pd.read_csv(STORAGE_FOLDER / "data" / "postcodes_with_con.csv")

In [None]:
postcodes[postcodes.short_code == "UKPARL.2025.HSP"].sample(5)

In [5]:
postcodes.postcode = postcodes.postcode.str.replace(" ", "")
df["constituency"] = df.Postcode.str.replace(" ", "").map(postcodes.set_index("postcode").short_code)

In [None]:
df[df.constituency == "UKPARL.2025.HSP"].Price.median() # https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/parliamentaryconstituencyhousepricestatisticsforsmallareas gets 676k in March 2025

In [None]:
(df[df.constituency == "UKPARL.2025.HSP"].Price >= 1.75e6).mean()

In [8]:
import numpy as np

bins = [0, 250000, 500000, 750000, 1000000, 1500000, 1750000, 2000000, 3000000, 5000000, 10000000, np.inf]
counts = []

for i in range(len(bins) - 1):
    lower = bins[i]
    upper = bins[i + 1]
    count = (df[df.constituency == "UKPARL.2025.HSP"].Price >= lower) & (df[df.constituency == "UKPARL.2025.HSP"].Price < upper)
    counts.append(count.mean())

stats = pd.DataFrame({
    "bin_lower": bins[:-1],
    "bin_upper": bins[1:],
    "proportion": counts
})

In [None]:
df[df.constituency == "UKPARL.2025.HSP"].groupby("year").Price.count()

In [None]:
stats.bin_lower = stats.bin_lower.astype(str)

stats["text"] = stats.proportion.apply(lambda x: f"{x:.1%}")
stats["bin_lower_text"] = stats.bin_lower.apply(lambda x: f"Â£{int(x):,}")

import plotly.express as px

px.bar(stats, x="bin_lower_text", y="proportion", title="Proportion of houses sold in each price band in Holborn and St Pancras", text="text").update_layout(
    height=600,
    width=800,
    xaxis_title="Price band lower bound",
    yaxis_title="Proportion of houses sold",
)

In [None]:
import plotly.express as px

fig = px.histogram(df[df.constituency == "UKPARL.2025.HSP"].Price, nbins=100, cumulative=True)

fig.update_layout(
    title="Distribution of sale prices in Holborn and St Pancras, 2026 forecast prices",
    height=600,
    width=800,
    showlegend=False,
)