In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

# --------------------------
# Load and preprocess UK house price data
# --------------------------
df = pd.read_csv("Data/UK-House-Prices.csv")
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)


# --------------------------
# Select 10 regions evenly spaced by mean price
# --------------------------
sorted_regions = df.groupby("RegionName").AveragePrice.mean().sort_values(ascending=False)
indices = np.linspace(0, len(sorted_regions) - 1, 10, dtype=int)
selected_regions = sorted_regions.iloc[indices]

# --------------------------
# Calculate yearly average house prices for all regions
# --------------------------
yearly_averages = df.groupby("RegionName").AveragePrice.resample("YE").mean()

# --------------------------
# Keep only selected regions and pivot for plotting
# --------------------------
yearly_selected = yearly_averages.loc[selected_regions.index].unstack(level=0)

# --------------------------
# Reindex years starting from 2000
# --------------------------
years = pd.date_range(start="2000-12-31", end=yearly_selected.index.max(), freq="YE")
yearly_selected = yearly_selected.reindex(years)

# --------------------------
# Plot mean house prices for the selected regions
# --------------------------
fig, ax = plt.subplots(figsize=(12, 6))
for column in yearly_selected.columns:
    ax.plot(yearly_selected.index, yearly_selected[column], marker='o', label=column)

ax.set_xlabel("Year", fontsize=12)
ax.set_ylabel("Price (Â£)", fontsize=12)
ax.set_title("UK House Prices 2000-2025 (10 Selected Regions)", fontsize=14, fontweight='bold')
ax.grid(True, linestyle='--', alpha=0.5)
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))
ax.legend(title="Region", bbox_to_anchor=(1.05, 1), loc="upper left")
fig.tight_layout()

# Save figure to plots folder
fig.savefig("plots/house_prices_by_region.png", dpi=300)
plt.show()

# --------------------------
# Calculate and plot annual house price growth per region
# --------------------------

growth_average = df.groupby("RegionName")["12m%Change"].resample("YE").mean()
growth_average_selected = growth_average.loc[selected_regions.index].unstack(level=0)
growth_average_selected = growth_average_selected.reindex(years)

fig, ax = plt.subplots(figsize=(12, 6))
for column in growth_average_selected.columns:
    ax.plot(growth_average_selected.index, growth_average_selected[column], marker='o', label=column)

ax.set_xlabel("Year", fontsize=12)
ax.set_ylabel("Price Growth (%)", fontsize=12)
ax.set_title("UK House Price Growth 2000-2025 (10 Selected Regions)", fontsize=14, fontweight='bold')
ax.grid(True, linestyle='--', alpha=0.5)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.legend(title="Region", bbox_to_anchor=(1.05, 1), loc="upper left")
fig.tight_layout()

# Save figure to plots folder
fig.savefig("plots/house_price_growth_by_region.png", dpi=300)
plt.show()

# --------------------------
# Load and preprocess Bank of England interest rate data
# --------------------------
df1 = pd.read_excel("Data/Bank Rate history and data  Bank of England Database.xlsx")
df1["Date Changed"] = pd.to_datetime(df1["Date Changed"], format="%d %b %y")
df1.set_index("Date Changed", inplace=True)
df1 = df1.sort_index()

# Resample yearly: take last rate of each year, forward-fill missing years
average_interest_rate_yearly = df1["Rate"].resample("YE").last()
average_interest_rate_yearly = average_interest_rate_yearly.reindex(years).ffill()

# --------------------------
# Calculate and plot 12-month rolling average of house price growth
# --------------------------
monthly_growth = df.groupby("Date")["12m%Change"].mean()
rolling_growth = monthly_growth.rolling(window=12).mean()

fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(rolling_growth.index, rolling_growth.values, linewidth=2)
ax.set_xlabel("Year", fontsize=12)
ax.set_ylabel("Rolling 12-Month Growth (%)", fontsize=12)
ax.set_title("UK House Price Growth (12-Month Rolling Average)", fontsize=14, fontweight='bold')
ax.grid(True, linestyle='--', alpha=0.5)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
fig.tight_layout()

# Save figure to plots folder
fig.savefig("plots/rolling_12m_house_price_growth.png", dpi=300)
plt.show()

# --------------------------
# Plot dual-axis graph: House Price Growth vs Bank of England Base Rate
# --------------------------
fig, ax1 = plt.subplots(figsize=(12, 6))

# House price growth on left axis
for column in growth_average_selected.columns:
    ax1.plot(
        growth_average_selected.index,
        growth_average_selected[column],
        marker='o',
        linewidth=1.2,
        label=column
    )

ax1.set_xlabel("Year", fontsize=12)
ax1.set_ylabel("House Price Growth (%)", color='blue', fontsize=12)
ax1.tick_params(axis='y', labelcolor='blue')
ax1.yaxis.set_major_formatter(mtick.PercentFormatter())
ax1.grid(True, linestyle='--', alpha=0.5)

# Bank Rate on right axis
ax2 = ax1.twinx()
ax2.plot(
    average_interest_rate_yearly.index,
    average_interest_rate_yearly.values,
    color='red',
    marker='s',
    linewidth=1.8,
    label='Bank of England Base Rate'
)
ax2.set_ylabel("Interest Rate (%)", color='red', fontsize=12)
ax2.tick_params(axis='y', labelcolor='red')
ax2.yaxis.set_major_formatter(mtick.PercentFormatter())

# Combine legends
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax1.legend(
    lines_1 + lines_2,
    labels_1 + labels_2,
    title="Region / Rate",
    bbox_to_anchor=(1.18, 1),
    loc="upper left",
    fontsize=7,
    title_fontsize=9,
)

ax1.set_title("UK House Price Growth vs Interest Rate 2000-2025", fontsize=14, fontweight='bold')
fig.tight_layout()

# Save figure to plots folder (main visual for README)
fig.savefig("plots/house_price_growth_vs_base_rate.png", dpi=300)
plt.show()

# --------------------------
# Scatter plot: Bank Rate vs average annual house price growth
# --------------------------
national_growth = growth_average_selected.mean(axis=1)
aligned_growth = national_growth.reindex(average_interest_rate_yearly.index)

fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(
    average_interest_rate_yearly.values,
    aligned_growth.values,
    s=60,
    alpha=0.7,
    edgecolors='black'
)
ax.set_xlabel("Bank of England Base Rate (%)", fontsize=12)
ax.set_ylabel("House Price Growth (%)", fontsize=12)
ax.set_title("Relationship Between Interest Rates and UK House Price Growth", fontsize=14, fontweight='bold')
ax.grid(True, linestyle='--', alpha=0.5)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
fig.tight_layout()

# Save figure to plots folder
fig.savefig("plots/scatter_rate_vs_growth.png", dpi=300)
plt.show()

# Print correlation after scatter plot
corr = national_growth.corr(average_interest_rate_yearly)
print("Correlation between average house price growth and Bank Rate:", corr)

# Print correlation with 1-year lagged interest rates
lagged_rates = average_interest_rate_yearly.shift(1)
corr_lagged = national_growth.corr(lagged_rates)
print("Correlation with 1-year lagged interest rates:", corr_lagged)

lagged_rates = average_interest_rate_yearly.shift(1)
corr_lagged = national_growth.corr(lagged_rates)

print("Correlation (same year):", national_growth.corr(average_interest_rate_yearly))
print("Correlation (1-year lag):", corr_lagged)
