pip install pandas==1.5.3
pip install numpy==1.24.3
pip install openpyxl==3.1.2
pip install matplotlib==3.7.3
pip install geopandas==0.13.2

In [2]:
import pandas as pd
import geopandas as gpd

In [7]:
# Load the lookup table with the correct encoding
lookup_df = pd.read_csv("data/lookup_table.csv", encoding='latin1', low_memory=False)
lookup_df.head(5)

Unnamed: 0,pcd7,pcd8,pcds,dointr,doterm,usertype,oa21cd,lsoa21cd,msoa21cd,ladcd,lsoa21nm,msoa21nm,ladnm,ladnmw
0,AB1 0AA,AB1 0AA,AB1 0AA,198001,199606.0,0,S00090303,S01006514,S02001237,S12000033,,,Aberdeen City,
1,AB1 0AB,AB1 0AB,AB1 0AB,198001,199606.0,0,S00090303,S01006514,S02001237,S12000033,,,Aberdeen City,
2,AB1 0AD,AB1 0AD,AB1 0AD,198001,199606.0,0,S00090399,S01006514,S02001237,S12000033,,,Aberdeen City,
3,AB1 0AE,AB1 0AE,AB1 0AE,199402,199606.0,0,S00091322,S01006853,S02001296,S12000034,,,Aberdeenshire,
4,AB1 0AF,AB1 0AF,AB1 0AF,199012,199207.0,1,S00090299,S01006511,S02001236,S12000033,,,Aberdeen City,


In [11]:
# List of Greater Manchester local authorities
gm_local_authorities = [
    "Bolton", "Bury", "Manchester", "Oldham", "Rochdale",
    "Salford", "Stockport", "Tameside", "Trafford", "Wigan"
]

# Filter the dataset to only include Greater Manchester areas
gm_lookup = lookup_df[lookup_df['ladnm'].isin(gm_local_authorities)]

# Check how many records we have for Greater Manchester
print(f"Total records in Greater Manchester: {len(gm_lookup)}")

# Preview the filtered data
print(gm_lookup.head())


Total records in Greater Manchester: 114801
           pcd7      pcd8     pcds  dointr    doterm  usertype     oa21cd  \
199332  BL0 0AA  BL0  0AA  BL0 0AA  198001       NaN         0  E00025401   
199333  BL0 0AB  BL0  0AB  BL0 0AB  198001       NaN         0  E00025393   
199334  BL0 0AD  BL0  0AD  BL0 0AD  201007       NaN         0  E00025393   
199335  BL0 0AE  BL0  0AE  BL0 0AE  199002  202205.0         1  E00025393   
199337  BL0 0AG  BL0  0AG  BL0 0AG  198001       NaN         0  E00025384   

         lsoa21cd   msoa21cd      ladcd   lsoa21nm  msoa21nm ladnm ladnmw  
199332  E01005014  E02001019  E08000002  Bury 001A  Bury 001  Bury    NaN  
199333  E01005014  E02001019  E08000002  Bury 001A  Bury 001  Bury    NaN  
199334  E01005014  E02001019  E08000002  Bury 001A  Bury 001  Bury    NaN  
199335  E01005014  E02001019  E08000002  Bury 001A  Bury 001  Bury    NaN  
199337  E01005017  E02001019  E08000002  Bury 001D  Bury 001  Bury    NaN  


In [12]:
# Rename columns to make them more clear
gm_lookup = gm_lookup.rename(columns={
    'pcd7': 'postcode_7char',
    'pcd8': 'postcode_8char',
    'pcds': 'postcode_full',
    'dointr': 'date_introduced',
    'doterm': 'date_terminated',
    'usertype': 'user_type',
    'oa21cd': 'output_area_code',
    'lsoa21cd': 'lsoa_code',
    'msoa21cd': 'msoa_code',
    'ladcd': 'local_authority_code',
    'lsoa21nm': 'lsoa_name',
    'msoa21nm': 'msoa_name',
    'ladnm': 'local_authority_name',
    'ladnmw': 'local_authority_name_welsh'
})

# Check the new column names
print(gm_lookup.columns)

# Preview the dataframe with new column names
print(gm_lookup.head())

Index(['postcode_7char', 'postcode_8char', 'postcode_full', 'date_introduced',
       'date_terminated', 'user_type', 'output_area_code', 'lsoa_code',
       'msoa_code', 'local_authority_code', 'lsoa_name', 'msoa_name',
       'local_authority_name', 'local_authority_name_welsh'],
      dtype='object')
       postcode_7char postcode_8char postcode_full  date_introduced  \
199332        BL0 0AA       BL0  0AA       BL0 0AA           198001   
199333        BL0 0AB       BL0  0AB       BL0 0AB           198001   
199334        BL0 0AD       BL0  0AD       BL0 0AD           201007   
199335        BL0 0AE       BL0  0AE       BL0 0AE           199002   
199337        BL0 0AG       BL0  0AG       BL0 0AG           198001   

        date_terminated  user_type output_area_code  lsoa_code  msoa_code  \
199332              NaN          0        E00025401  E01005014  E02001019   
199333              NaN          0        E00025393  E01005014  E02001019   
199334              NaN          0   

In [13]:
# Save the filtered dataset
gm_lookup.to_csv("data/gm_lookup.csv", index=False)