### Import dependencies and data

In [1]:
import pandas as pd
from datetime import datetime
from functools import reduce

In [2]:
cpi_df = pd.read_csv('cpi_data.csv')
gdp_df = pd.read_csv('monthly_gdp_data.csv')
mortgages_df = pd.read_csv('mortgage_approvals_data.csv')
interest_df = pd.read_csv('interest_rates_data.csv')
ukhpi_df = pd.read_csv('ukhpi_data.csv')
avg_price_df = pd.read_csv('avg_price_data.csv')

### CPI Data - Consumer Price Index

In [3]:
cpi_df.head()

Unnamed: 0,v4_0,mmm-yy,Time,uk-only,Geography,cpih1dim1aggid,Aggregate
0,128.0,Aug-23,Aug-23,K02000001,United Kingdom,CP0953_0954,"09.5.3/4 Miscellaneous printed matter, station..."
1,112.6,Aug-23,Aug-23,K02000001,United Kingdom,CP052,05.2 Household textiles
2,128.4,Aug-23,Aug-23,K02000001,United Kingdom,CP082_083,08.2/3 Telephone and telefax equipment
3,97.3,Aug-23,Aug-23,K02000001,United Kingdom,CP091,09.1 Audio-visual equipment and related products
4,216.5,Aug-23,Aug-23,K02000001,United Kingdom,CP0733,07.3.3 Passenger transport by air


In [4]:
list(set(cpi_df['Aggregate'].tolist()))

['09.1.3 Data processing equipment',
 '09.3 Other recreational items, gardens and pets',
 '07.3 Transport services',
 '05.4 Glassware, tableware and household utensils',
 '02.1.3 Beer',
 '07.1 Purchase of vehicles',
 '05.2 Household textiles',
 '11.1.2 Canteens',
 '01.1.3 Fish',
 '03.1.4 Cleaning, repair and hire of clothing',
 '01.1 Food',
 '07.3.1 Passenger transport by railway',
 '11.1.1 Restaurants and cafes',
 '12.1 Personal care',
 '07.3.3 Passenger transport by air',
 '01.1.5 Oils and fats',
 '07.1.1.1 New motor cars',
 '12.5.3 Health insurance',
 '09 Recreation and culture',
 '04.5 Electricity, gas and other fuels',
 '11 Restaurants and hotels',
 '04.4.1 Water supply',
 'Overall Index',
 '01 Food and non-alcoholic beverages',
 '08.1 Postal services',
 '12.1.2/3 Appliances and products for personal care',
 '03 Clothing and footwear',
 '06.1.2/3 Other medical and therapeutic equipment',
 '01.2.1 Coffee, tea and cocoa',
 '05.5 Tools and equipment for house and garden',
 '02.1 Alco

In [5]:
overall_cpi_df = cpi_df[cpi_df['Aggregate'].isin(['Overall Index', '04 Housing, water, electricity, gas and other fuels'])]\
        .drop(columns = ['Time', 'uk-only', 'Geography', 'cpih1dim1aggid'])\
        .reset_index(drop=True)
overall_cpi_df.head()

Unnamed: 0,v4_0,mmm-yy,Aggregate
0,129.4,Aug-23,Overall Index
1,127.8,Aug-23,"04 Housing, water, electricity, gas and other ..."
2,104.2,Sep-17,"04 Housing, water, electricity, gas and other ..."
3,104.3,Sep-17,Overall Index
4,79.3,Jan-06,"04 Housing, water, electricity, gas and other ..."


In [6]:
overall_cpi_df['mmm-yy'] = overall_cpi_df['mmm-yy'].apply(lambda x: datetime.strptime(x, '%b-%y').strftime('%d/%m/%Y'))
overall_cpi_df['Aggregate'].mask(overall_cpi_df['Aggregate'] == '04 Housing, water, electricity, gas and other fuels',
                                 'Housing Associated Costs', inplace=True)

In [7]:
overall_cpi_df = overall_cpi_df.rename(columns={'v4_0': 'cpi', 'mmm-yy': 'Date'})

In [8]:
overall_cpi_df['Date'] = pd.to_datetime(overall_cpi_df['Date'], format='%d/%m/%Y')

In [9]:
overall_cpi_df = overall_cpi_df.pivot(index='Date', columns='Aggregate', values='cpi')

In [10]:
overall_cpi_df.sort_values('Date').head()

Aggregate,Housing Associated Costs,Overall Index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1988-01-01,38.7,46.9
1988-02-01,38.8,47.0
1988-03-01,39.0,47.2
1988-04-01,39.8,47.8
1988-05-01,40.0,48.0


### Monthly GDP Estimate Data

In [11]:
gdp_df.head()

Unnamed: 0,v4_0,mmm-yy,Time,uk-only,Geography,sic-unofficial,UnofficialStandardIndustrialClassification
0,79.3539,Jan-09,Jan-09,K02000001,United Kingdom,F,F : Construction
1,93.8681,Jan-09,Jan-09,K02000001,United Kingdom,B--E,B-E : Production Industries
2,80.5284,Jan-09,Jan-09,K02000001,United Kingdom,G--T,G-T : Index of Services
3,102.3273,Jan-09,Jan-09,K02000001,United Kingdom,A,A : Agriculture
4,82.3913,Jan-09,Jan-09,K02000001,United Kingdom,A--T,A-T : Monthly GDP


In [12]:
monthly_gdp_df = gdp_df[gdp_df['sic-unofficial'] == 'A--T']\
        .drop(columns = ['Time', 'uk-only', 'Geography', 'sic-unofficial'])\
        .reset_index(drop=True)

In [13]:
monthly_gdp_df['mmm-yy'] = monthly_gdp_df['mmm-yy'].apply(lambda x: datetime.strptime(x, '%b-%y').strftime('%d/%m/%Y'))
monthly_gdp_df['UnofficialStandardIndustrialClassification'] = 'Monthly GDP'

In [14]:
monthly_gdp_df = monthly_gdp_df.rename(columns={'v4_0': 'gdp',
                                                'mmm-yy': 'Date',
                                                'UnofficialStandardIndustrialClassification': 'GDP'})

In [15]:
monthly_gdp_df['Date'] = pd.to_datetime(monthly_gdp_df['Date'], format='%d/%m/%Y')

In [16]:
monthly_gdp_df = monthly_gdp_df.pivot(index='Date', columns='GDP', values='gdp')

In [17]:
monthly_gdp_df.sort_values('Date').head()

GDP,Monthly GDP
Date,Unnamed: 1_level_1
1997-01-01,63.3398
1997-02-01,63.9959
1997-03-01,64.0355
1997-04-01,64.6273
1997-05-01,64.1371


### Mortgage Approvals Data

In [18]:
mortgages_df.head()

Unnamed: 0,DATE,LPMVTVU
0,30 Apr 1993,90963
1,31 May 1993,91684
2,30 Jun 1993,100672
3,31 Jul 1993,93576
4,31 Aug 1993,83478


In [19]:
mortgages_df['DATE'] = pd.to_datetime(mortgages_df['DATE'], format='%d %b %Y') + pd.offsets.MonthBegin(1)

In [20]:
mortgages_df.rename(columns={
    'DATE': 'Date',
    'LPMVTVU': 'Mortgage Approvals'}, inplace=True)

In [21]:
mortgages_df.set_index('Date', inplace=True)

In [22]:
mortgages_df.head()

Unnamed: 0_level_0,Mortgage Approvals
Date,Unnamed: 1_level_1
1993-05-01,90963
1993-06-01,91684
1993-07-01,100672
1993-08-01,93576
1993-09-01,83478


### Base Interest Rates Data

In [23]:
interest_df.head()

Unnamed: 0,DATE,IUMABEDR
0,28 Feb 1990,14.875
1,31 Mar 1990,14.875
2,30 Apr 1990,14.875
3,31 May 1990,14.875
4,30 Jun 1990,14.875


In [24]:
interest_df['DATE'] = pd.to_datetime(interest_df['DATE'], format='%d %b %Y') + pd.offsets.MonthBegin(1)

In [25]:
interest_df.rename(columns={
    'DATE': 'Date',
    'IUMABEDR': 'Base Interest Rate'}, inplace=True)

In [26]:
interest_df.set_index('Date', inplace=True)

In [27]:
interest_df.head()

Unnamed: 0_level_0,Base Interest Rate
Date,Unnamed: 1_level_1
1990-03-01,14.875
1990-04-01,14.875
1990-05-01,14.875
1990-06-01,14.875
1990-07-01,14.875


### UKHPI Data

In [28]:
ukhpi_df.head()

Unnamed: 0,Date,Region_Name,Area_Code,Index
0,1968-04-01,Northern Ireland,N92000001,3.30042
1,1968-04-01,England,E92000001,1.680067
2,1968-04-01,Wales,W92000004,2.119327
3,1968-04-01,Scotland,S92000003,2.108087
4,1968-04-01,London,E12000007,1.096815


In [29]:
ukhpi_df['Date'] = pd.to_datetime(ukhpi_df['Date'], format='%Y-%m-%d')
ukhpi_df.drop(columns='Area_Code', inplace=True)

In [30]:
desired_regions = ['England', 'Wales', 'Northern Ireland', 'Scotland']
national_ukhpi_df = ukhpi_df[ukhpi_df['Region_Name'].isin(desired_regions)]

In [31]:
national_ukhpi_df = national_ukhpi_df.pivot(index='Date', columns='Region_Name', values='Index')

In [32]:
national_ukhpi_df['United Kingdom'] = national_ukhpi_df.mean(axis=1)

In [33]:
national_ukhpi_df.columns = [f'UKHPI {c}' for c in national_ukhpi_df]

In [34]:
national_ukhpi_df.head()

Unnamed: 0_level_0,UKHPI England,UKHPI Northern Ireland,UKHPI Scotland,UKHPI Wales,UKHPI United Kingdom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1968-04-01,1.680067,3.30042,2.108087,2.119327,2.301975
1968-05-01,1.680067,3.30042,2.108087,2.119327,2.301975
1968-06-01,1.680067,3.30042,2.108087,2.119327,2.301975
1968-07-01,1.737342,3.167644,2.311005,2.221327,2.359329
1968-08-01,1.737342,3.167644,2.311005,2.221327,2.359329


### Average Monthly Price Data

In [35]:
avg_price_df.head()

Unnamed: 0,Date,Region_Name,Area_Code,Average_Price,Monthly_Change,Annual_Change,Average_Price_SA
0,1968-04-01,Northern Ireland,N92000001,3661.4855,0.0,,
1,1968-04-01,England,E92000001,3408.108064,0.0,,
2,1968-04-01,Wales,W92000004,2885.414162,0.0,,
3,1968-04-01,Scotland,S92000003,2844.980688,0.0,,
4,1968-04-01,London,E12000007,4418.489911,0.0,,


In [36]:
avg_price_df['Date'] = pd.to_datetime(avg_price_df['Date'], format='%Y-%m-%d')
avg_price_df.drop(columns=['Area_Code', 'Average_Price_SA', 'Monthly_Change', 'Annual_Change'], inplace=True)

In [37]:
desired_regions = ['England', 'Wales', 'Northern Ireland', 'Scotland']
national_avg_price_df = avg_price_df[avg_price_df['Region_Name'].isin(desired_regions)]

In [38]:
national_avg_price_df = national_avg_price_df.pivot(index='Date', columns='Region_Name', values='Average_Price')

In [39]:
national_avg_price_df['United Kingdom'] = national_avg_price_df.mean(axis=1)

In [40]:
national_avg_price_df.columns = [f'Average Property Price {c}' for c in national_avg_price_df]

In [41]:
national_avg_price_df.head()

Unnamed: 0_level_0,Average Property Price England,Average Property Price Northern Ireland,Average Property Price Scotland,Average Property Price Wales,Average Property Price United Kingdom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1968-04-01,3408.108064,3661.4855,2844.980688,2885.414162,3199.997103
1968-05-01,3408.108064,3661.4855,2844.980688,2885.414162,3199.997103
1968-06-01,3408.108064,3661.4855,2844.980688,2885.414162,3199.997103
1968-07-01,3524.293566,3514.184359,3118.829097,3024.284362,3295.397846
1968-08-01,3524.293566,3514.184359,3118.829097,3024.284362,3295.397846


### Calculate rate of change (CPI/Inflation and UKHPI)

In [42]:
overall_cpi_df

Aggregate,Housing Associated Costs,Overall Index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1988-01-01,38.7,46.9
1988-02-01,38.8,47.0
1988-03-01,39.0,47.2
1988-04-01,39.8,47.8
1988-05-01,40.0,48.0
...,...,...
2023-08-01,127.8,129.4
2023-09-01,128.3,130.1
2023-10-01,127.9,130.2
2023-11-01,128.2,130.0


In [43]:
overall_cpi_df['Housing Associated Costs Inflation Rate'] = overall_cpi_df['Housing Associated Costs'].pct_change()

In [44]:
overall_cpi_df['Overall Inflation Rate'] = overall_cpi_df['Overall Index'].pct_change()

In [45]:
overall_cpi_df

Aggregate,Housing Associated Costs,Overall Index,Housing Associated Costs Inflation Rate,Overall Inflation Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1988-01-01,38.7,46.9,,
1988-02-01,38.8,47.0,0.002584,0.002132
1988-03-01,39.0,47.2,0.005155,0.004255
1988-04-01,39.8,47.8,0.020513,0.012712
1988-05-01,40.0,48.0,0.005025,0.004184
...,...,...,...,...
2023-08-01,127.8,129.4,0.003928,0.003101
2023-09-01,128.3,130.1,0.003912,0.005410
2023-10-01,127.9,130.2,-0.003118,0.000769
2023-11-01,128.2,130.0,0.002346,-0.001536


In [46]:
national_ukhpi_df

Unnamed: 0_level_0,UKHPI England,UKHPI Northern Ireland,UKHPI Scotland,UKHPI Wales,UKHPI United Kingdom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1968-04-01,1.680067,3.300420,2.108087,2.119327,2.301975
1968-05-01,1.680067,3.300420,2.108087,2.119327,2.301975
1968-06-01,1.680067,3.300420,2.108087,2.119327,2.301975
1968-07-01,1.737342,3.167644,2.311005,2.221327,2.359329
1968-08-01,1.737342,3.167644,2.311005,2.221327,2.359329
...,...,...,...,...,...
2023-07-01,150.300000,161.800000,140.900000,157.600000,152.650000
2023-08-01,152.000000,161.800000,144.100000,158.900000,154.200000
2023-09-01,151.700000,161.800000,141.600000,158.200000,153.325000
2023-10-01,150.400000,161.800000,142.300000,156.900000,152.850000


In [47]:
for col in national_ukhpi_df:
    national_ukhpi_df[f'{col} % Change'] = national_ukhpi_df[col].pct_change()

In [48]:
national_ukhpi_df.head()

Unnamed: 0_level_0,UKHPI England,UKHPI Northern Ireland,UKHPI Scotland,UKHPI Wales,UKHPI United Kingdom,UKHPI England % Change,UKHPI Northern Ireland % Change,UKHPI Scotland % Change,UKHPI Wales % Change,UKHPI United Kingdom % Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1968-04-01,1.680067,3.30042,2.108087,2.119327,2.301975,,,,,
1968-05-01,1.680067,3.30042,2.108087,2.119327,2.301975,0.0,0.0,0.0,0.0,0.0
1968-06-01,1.680067,3.30042,2.108087,2.119327,2.301975,0.0,0.0,0.0,0.0,0.0
1968-07-01,1.737342,3.167644,2.311005,2.221327,2.359329,0.034091,-0.04023,0.096257,0.048128,0.024915
1968-08-01,1.737342,3.167644,2.311005,2.221327,2.359329,0.0,0.0,0.0,0.0,0.0


### Stitch all data into one dataframe

In [49]:
all_dfs = [overall_cpi_df, monthly_gdp_df, mortgages_df, interest_df, national_ukhpi_df, national_avg_price_df]

In [50]:
all_dfs_merged = reduce(lambda  left,right: pd.merge(left,right,
                                                     left_index=True, right_index=True,
                                                    how='outer'), all_dfs)

In [51]:
all_dfs_merged.columns

Index(['Housing Associated Costs', 'Overall Index',
       'Housing Associated Costs Inflation Rate', 'Overall Inflation Rate',
       'Monthly GDP', 'Mortgage Approvals', 'Base Interest Rate',
       'UKHPI England', 'UKHPI Northern Ireland', 'UKHPI Scotland',
       'UKHPI Wales', 'UKHPI United Kingdom', 'UKHPI England % Change',
       'UKHPI Northern Ireland % Change', 'UKHPI Scotland % Change',
       'UKHPI Wales % Change', 'UKHPI United Kingdom % Change',
       'Average Property Price England',
       'Average Property Price Northern Ireland',
       'Average Property Price Scotland', 'Average Property Price Wales',
       'Average Property Price United Kingdom'],
      dtype='object')

In [64]:
all_dfs_merged

Unnamed: 0_level_0,Housing Associated Costs,Overall Index,Housing Associated Costs Inflation Rate,Overall Inflation Rate,Monthly GDP,Mortgage Approvals,Base Interest Rate,UKHPI England,UKHPI Northern Ireland,UKHPI Scotland,...,UKHPI England % Change,UKHPI Northern Ireland % Change,UKHPI Scotland % Change,UKHPI Wales % Change,UKHPI United Kingdom % Change,Average Property Price England,Average Property Price Northern Ireland,Average Property Price Scotland,Average Property Price Wales,Average Property Price United Kingdom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997-01-01,63.5,69.2,0.001577,-0.002882,63.3398,72978.0,5.9375,27.501591,46.949854,32.187747,...,0.000596,0.039735,-0.018779,-0.008151,0.006769,55788.50251,52086.16563,43439.14962,43524.95351,48709.692817
1997-02-01,63.6,69.3,0.001575,0.001445,63.9959,72980.0,5.9375,27.704075,46.949854,32.187747,...,0.007363,0.000000,0.000000,0.025432,0.007326,56199.25087,52086.16563,43439.14962,44631.86395,49089.107518
1997-03-01,63.6,69.4,0.000000,0.001443,64.0355,84044.0,5.9375,27.992467,46.949854,32.187747,...,0.010410,0.000000,0.000000,-0.001807,0.001641,56784.27139,52086.16563,43439.14962,44551.19667,49215.195827
1997-04-01,64.1,69.7,0.007862,0.004323,64.6273,101195.0,5.9375,28.383387,44.424597,34.066649,...,0.013965,-0.053786,0.058373,-0.007601,-0.003605,57577.27313,49284.64504,45974.83204,44212.55854,49262.327187
1997-05-01,64.4,70.0,0.004680,0.004304,64.1371,117383.0,5.9375,28.837231,44.424597,34.066649,...,0.015990,0.000000,0.000000,0.027846,0.009746,58497.92049,49284.64504,45974.83204,45443.71838,49800.278988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-01,127.3,129.0,-0.013943,-0.003091,102.4073,64958.0,4.6591,150.300000,161.800000,140.900000,...,0.007373,0.030573,0.014399,0.016119,0.017328,304943.00000,179530.00000,190169.00000,214635.00000,222319.250000
2023-08-01,127.8,129.4,0.003928,0.003101,102.4166,53023.0,5.0000,152.000000,161.800000,144.100000,...,0.011311,0.000000,0.022711,0.008249,0.010154,308325.00000,179530.00000,194485.00000,216342.00000,224670.500000
2023-09-01,128.3,130.1,0.003912,0.005410,102.4730,48063.0,5.2273,151.700000,161.800000,141.600000,...,-0.001974,0.000000,-0.017349,-0.004405,-0.005674,307635.00000,179530.00000,191123.00000,215368.00000,223414.000000
2023-10-01,127.9,130.2,-0.003118,0.000769,102.1730,44293.0,5.2500,150.400000,161.800000,142.300000,...,-0.008570,0.000000,0.004944,-0.008217,-0.003098,305148.00000,179530.00000,191976.00000,213573.00000,222556.750000


In [62]:
all_dfs_merged.dropna(inplace=True)

### Functions to format data for dashboard

In [54]:
def format_cpi_data(df: pd.DataFrame):
    overall_cpi_df = df[df['Aggregate'].isin(['Overall Index', '04 Housing, water, electricity, gas and other fuels'])]\
        .drop(columns = ['Time', 'uk-only', 'Geography', 'cpih1dim1aggid'])\
        .reset_index(drop=True)
    overall_cpi_df['mmm-yy'] = overall_cpi_df['mmm-yy'].apply(lambda x: datetime.strptime(x, '%b-%y').strftime('%d/%m/%Y'))
    overall_cpi_df['Aggregate'].mask(overall_cpi_df['Aggregate'] == '04 Housing, water, electricity, gas and other fuels',
                                 'Housing Associated Costs', inplace=True)
    overall_cpi_df = overall_cpi_df.rename(columns={'v4_0': 'cpi', 'mmm-yy': 'Date'})
    overall_cpi_df['Date'] = pd.to_datetime(overall_cpi_df['Date'], format='%d/%m/%Y')
    overall_cpi_df = overall_cpi_df.pivot(index='Date', columns='Aggregate', values='cpi')
    overall_cpi_df['Housing Associated Costs Inflation Rate'] = overall_cpi_df['Housing Associated Costs'].pct_change()
    overall_cpi_df['Overall Inflation Rate'] = overall_cpi_df['Overall Index'].pct_change()
    return overall_cpi_df.sort_values('Date')

In [55]:
format_cpi_data(cpi_df)

Aggregate,Housing Associated Costs,Overall Index,Housing Associated Costs Inflation Rate,Overall Inflation Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1988-01-01,38.7,46.9,,
1988-02-01,38.8,47.0,0.002584,0.002132
1988-03-01,39.0,47.2,0.005155,0.004255
1988-04-01,39.8,47.8,0.020513,0.012712
1988-05-01,40.0,48.0,0.005025,0.004184
...,...,...,...,...
2023-08-01,127.8,129.4,0.003928,0.003101
2023-09-01,128.3,130.1,0.003912,0.005410
2023-10-01,127.9,130.2,-0.003118,0.000769
2023-11-01,128.2,130.0,0.002346,-0.001536


In [56]:
def format_gdp_data(df: pd.DataFrame):
    monthly_gdp_df = df[df['sic-unofficial'] == 'A--T']\
        .drop(columns = ['Time', 'uk-only', 'Geography', 'sic-unofficial'])\
        .reset_index(drop=True)
    monthly_gdp_df['mmm-yy'] = monthly_gdp_df['mmm-yy'].apply(lambda x: datetime.strptime(x, '%b-%y').strftime('%d/%m/%Y'))
    monthly_gdp_df['UnofficialStandardIndustrialClassification'] = 'Monthly GDP'
    monthly_gdp_df = monthly_gdp_df.rename(columns={'v4_0': 'gdp',
                                                'mmm-yy': 'Date',
                                                'UnofficialStandardIndustrialClassification': 'GDP'})
    monthly_gdp_df['Date'] = pd.to_datetime(monthly_gdp_df['Date'], format='%d/%m/%Y')
    monthly_gdp_df = monthly_gdp_df.pivot(index='Date', columns='GDP', values='gdp')
    return monthly_gdp_df.sort_values('Date')

In [57]:
format_gdp_data(gdp_df)

GDP,Monthly GDP
Date,Unnamed: 1_level_1
1997-01-01,63.3398
1997-02-01,63.9959
1997-03-01,64.0355
1997-04-01,64.6273
1997-05-01,64.1371
...,...
2023-07-01,102.4073
2023-08-01,102.4166
2023-09-01,102.4730
2023-10-01,102.1730


In [58]:
def format_mortgage_data(df: pd.DataFrame):
    mortgages_df = df
    mortgages_df['DATE'] = pd.to_datetime(mortgages_df['DATE'], format='%d %b %Y')  + pd.offsets.MonthBegin(1)
    mortgages_df.rename(columns={
            'DATE': 'Date',
            'LPMVTVU': 'Mortgage Approvals'}, inplace=True)
    mortgages_df.set_index('Date', inplace=True)
    return mortgages_df.sort_values('Date')

In [59]:
format_mortgage_data(mortgages_df)

KeyError: 'DATE'

In [None]:
def format_interest_data(df: pd.DataFrame):
    interest_df = df
    interest_df['DATE'] = pd.to_datetime(interest_df['DATE'], format='%d %b %Y') + pd.offsets.MonthBegin(1)
    interest_df.rename(columns={
            'DATE': 'Date',
            'IUMABEDR': 'Base Interest Rate'}, inplace=True)
    interest_df.set_index('Date', inplace=True)
    return interest_df.sort_values('Date')

In [None]:
format_interest_data(interest_df)

In [None]:
def format_ukhpi_data(df: pd.DataFrame):
    ukhpi_df = df
    ukhpi_df['Date'] = pd.to_datetime(ukhpi_df['Date'], format='%Y-%m-%d')
    ukhpi_df.drop(columns='Area_Code', inplace=True)
    desired_regions = ['England', 'Wales', 'Northern Ireland', 'Scotland']
    national_ukhpi_df = ukhpi_df[ukhpi_df['Region_Name'].isin(desired_regions)]
    national_ukhpi_df = national_ukhpi_df.pivot(index='Date', columns='Region_Name', values='Index')
    national_ukhpi_df['United Kingdom'] = national_ukhpi_df.mean(axis=1)
    national_ukhpi_df.columns = [f'UKHPI {c}' for c in national_ukhpi_df]
    for col in national_ukhpi_df:
        national_ukhpi_df[f'{col} % Change'] = national_ukhpi_df[col].pct_change()
    return national_ukhpi_df.sort_values('Date')

In [None]:
format_ukhpi_data(ukhpi_df)

In [None]:
def format_avg_price_data(df: pd.DataFrame):
    avg_price_df = df
    avg_price_df['Date'] = pd.to_datetime(avg_price_df['Date'], format='%Y-%m-%d')
    avg_price_df.drop(columns=['Area_Code', 'Average_Price_SA', 'Monthly_Change', 'Annual_Change'], inplace=True)
    desired_regions = ['England', 'Wales', 'Northern Ireland', 'Scotland']
    national_avg_price_df = avg_price_df[avg_price_df['Region_Name'].isin(desired_regions)]
    national_avg_price_df = national_avg_price_df.pivot(index='Date', columns='Region_Name', values='Average_Price')
    national_avg_price_df['United Kingdom'] = national_avg_price_df.mean(axis=1)
    national_avg_price_df.columns = [f'Average Property Price {c}' for c in national_avg_price_df]
    return national_avg_price_df.sort_values('Date')

In [None]:
format_avg_price_data(avg_price_df)

In [65]:
def stitch_all_data(dfs_list: list):
    dfs_merged = reduce(lambda  left,right: pd.merge(left,right,
                                                         left_index=True, right_index=True,
                                                        how='outer'), dfs_list)
    dfs_merged.dropna(inplace=True)
    return dfs_merged

In [66]:
stitch_all_data(all_dfs)

Unnamed: 0_level_0,Housing Associated Costs,Overall Index,Housing Associated Costs Inflation Rate,Overall Inflation Rate,Monthly GDP,Mortgage Approvals,Base Interest Rate,UKHPI England,UKHPI Northern Ireland,UKHPI Scotland,...,UKHPI England % Change,UKHPI Northern Ireland % Change,UKHPI Scotland % Change,UKHPI Wales % Change,UKHPI United Kingdom % Change,Average Property Price England,Average Property Price Northern Ireland,Average Property Price Scotland,Average Property Price Wales,Average Property Price United Kingdom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997-01-01,63.5,69.2,0.001577,-0.002882,63.3398,72978.0,5.9375,27.501591,46.949854,32.187747,...,0.000596,0.039735,-0.018779,-0.008151,0.006769,55788.50251,52086.16563,43439.14962,43524.95351,48709.692817
1997-02-01,63.6,69.3,0.001575,0.001445,63.9959,72980.0,5.9375,27.704075,46.949854,32.187747,...,0.007363,0.000000,0.000000,0.025432,0.007326,56199.25087,52086.16563,43439.14962,44631.86395,49089.107518
1997-03-01,63.6,69.4,0.000000,0.001443,64.0355,84044.0,5.9375,27.992467,46.949854,32.187747,...,0.010410,0.000000,0.000000,-0.001807,0.001641,56784.27139,52086.16563,43439.14962,44551.19667,49215.195827
1997-04-01,64.1,69.7,0.007862,0.004323,64.6273,101195.0,5.9375,28.383387,44.424597,34.066649,...,0.013965,-0.053786,0.058373,-0.007601,-0.003605,57577.27313,49284.64504,45974.83204,44212.55854,49262.327187
1997-05-01,64.4,70.0,0.004680,0.004304,64.1371,117383.0,5.9375,28.837231,44.424597,34.066649,...,0.015990,0.000000,0.000000,0.027846,0.009746,58497.92049,49284.64504,45974.83204,45443.71838,49800.278988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-01,127.3,129.0,-0.013943,-0.003091,102.4073,64958.0,4.6591,150.300000,161.800000,140.900000,...,0.007373,0.030573,0.014399,0.016119,0.017328,304943.00000,179530.00000,190169.00000,214635.00000,222319.250000
2023-08-01,127.8,129.4,0.003928,0.003101,102.4166,53023.0,5.0000,152.000000,161.800000,144.100000,...,0.011311,0.000000,0.022711,0.008249,0.010154,308325.00000,179530.00000,194485.00000,216342.00000,224670.500000
2023-09-01,128.3,130.1,0.003912,0.005410,102.4730,48063.0,5.2273,151.700000,161.800000,141.600000,...,-0.001974,0.000000,-0.017349,-0.004405,-0.005674,307635.00000,179530.00000,191123.00000,215368.00000,223414.000000
2023-10-01,127.9,130.2,-0.003118,0.000769,102.1730,44293.0,5.2500,150.400000,161.800000,142.300000,...,-0.008570,0.000000,0.004944,-0.008217,-0.003098,305148.00000,179530.00000,191976.00000,213573.00000,222556.750000
