make the binary LAD level A matrices from the binary MSOA level A matrices

In [17]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, hstack
from scipy.io import mmread, mmwrite
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
# from scipy import sparse
import plotly.express as px
from scipy import sparse
import png
import math
import scipy.linalg as LA
import nodevectors
from sklearn.decomposition import PCA
import seaborn as sns
from tabulate import tabulate

Read in MSOA node data:

In [18]:
# read in MSOA names (labels for A)
nodes = pd.read_csv("nodes_MSOA.csv")
nodes = nodes[["msoa", "index"]]

Read in lookup table from: 

https://geoportal.statistics.gov.uk/datasets/middle-layer-super-output-area-2001-to-middle-layer-super-output-area-2011-to-local-authority-district-2011-lookup-in-england-and-wales/explore

which goes from MSOA to LAD for England and Wales. It is hard to find a lookup table to go between MSOA and Scotland Local authority districts. 

**see if i can find LADS for scotland so i drop less NAs a few cells down**

In [19]:
MSOA_LAD_lookup = pd.read_csv("MSOA2001_MSOA2011_LAD2011_Lookup_EW.csv")

In [20]:
# only interested in the 2011 MSOA codes (not the 2001)
MSOA_LAD_lookup = MSOA_LAD_lookup[["MSOA11CD", "MSOA11NM", "LAD11CD", "LAD11NM"]]

In [21]:
MSOA_LAD_lookup = MSOA_LAD_lookup.drop_duplicates()
MSOA_LAD_lookup = MSOA_LAD_lookup.dropna()

In [22]:
nodes_to_LAD = nodes.merge(MSOA_LAD_lookup, left_on="msoa", right_on="MSOA11CD", how = "left")

In [23]:
nodes_to_LAD.shape

(8483, 6)

In [24]:
nodes_to_LAD = nodes_to_LAD.dropna()

In [25]:
nodes_to_LAD.shape
# lose the nodes that are scotland or other MSOAs outside of E&W

(7200, 6)

In [26]:
# define all the LADs in a np array so that later we can go through these values in a for loop
lads = nodes_to_LAD.LAD11NM.unique()

In [27]:
lads.shape
# 348 LADs in E&W

(348,)

### Make A matrix of the concatenated 348x348 (LAD by LAD) A matrices, from row and column summing.

In [28]:
A_list = []
A_LADLAD_list = []
year_list = []

for year in range(2005,2011):
    # does 2005-2010 inclusive
    filepath = "A_for_" + str(year) + "_binary.mtx" # file name to read in 
    A = mmread(filepath) # temporary
    
    year_list.append(year)
    
    A = A.toarray()
    
    A_LADMSOA = list()
    A_LADLAD =list() # empty numpy array to fill, one row
    # to make "LAD by LAD" version of A - called A_LADLAD
    
    for lad in lads: 
        
        # find all the indexes for the given reg
        row_indexes = nodes_to_LAD.loc[nodes_to_LAD["LAD11NM"] == lad, "index"]
        # make a numpy array and subtract 1, as python indexes from 0 not 1
        row_indexes  = row_indexes.to_numpy()
        row_indexes = [x - 1 for x in row_indexes]
        column_indexes = row_indexes
        
        # find all the rows with the indexes for that region, and add them together to make one row 
        row = A[row_indexes, :]
        row = np.sum(row, axis=0)
        
        # one row for each region in the matrix A_RegMSOA
        A_LADMSOA.append(row)
        
    A_LADMSOA = np.asarray(A_LADMSOA)
    
    for lad in lads: 
        
        # find all the columns with the indexes for that region, from the already row summed matrix, 
        # and add them together to make one column 
        column_indexes = nodes_to_LAD.loc[nodes_to_LAD["LAD11NM"] == lad, "index"]
        column_indexes = column_indexes.to_numpy()
        column_indexes = [x - 1 for x in column_indexes]

        column = A_LADMSOA[:, column_indexes]
        column = np.sum(column, axis=1)
        
        A_LADLAD.append(column)
    
    A_LADLAD = np.asarray(A_LADLAD)
    A_LADLAD = sparse.coo_matrix(A_LADLAD)

    # add a matrix for each year to the list
    A_LADLAD_list.append(A_LADLAD)
    
#print(A_RegMSOA_list)    
A_LADLAD_all = hstack(A_LADLAD_list)

In [29]:
A_LADLAD_all.shape
# n x 6n 

(348, 2088)

In [14]:
# save all the LAD by LAD matrices

for i in range(0,6):
    mat = A_LADLAD_list[i]
    mmwrite("A_LAD_" + str(i+2005) + "_binary.mtx", mat)