In [89]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, hstack
from scipy.io import mmread, mmwrite
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
# from scipy import sparse
import plotly.express as px
from scipy import sparse
import png
import math
import scipy.linalg as LA
import nodevectors
from sklearn.decomposition import PCA
import seaborn as sns
from tabulate import tabulate

In [90]:
# data that maps LAD to region, but isn't ordered the way to LADxLAD matrices are
LAD_Region_lookup = pd.read_csv("LAD_Region_lookup.csv")
regions = LAD_Region_lookup.RGN11NM.unique()

In [91]:
regions

array(['North East', 'North West', 'Yorkshire and The Humber',
       'East Midlands', 'West Midlands', 'South West', 'East of England',
       'South East', 'London', 'Wales'], dtype=object)

In [92]:
regionsdf = pd.DataFrame(regions)

In [93]:
# Create a list of the order that the Region x Region matrices are indexed
regionsdf.to_csv("Regions_list.csv", sep=',')

In [94]:
lads_list = pd.read_csv("LADS_list.csv", names=["index", "LAD"])

In [95]:
# reorder LAD_Region_lookup in the correct order to correspond to the A_LAD_binary matrices

LAD_Region_lookup = lads_list.merge(LAD_Region_lookup, left_on="LAD", right_on="LAD11NM", how="left")

In [96]:
LAD_Region_lookup.head(3)

Unnamed: 0,index,LAD,LAD11CD,LAD11NM,RGN11CD,RGN11NM
0,0,St Albans,E07000100,St Albans,E12000006,East of England
1,1,Welwyn Hatfield,E07000104,Welwyn Hatfield,E12000006,East of England
2,2,Hertsmere,E07000098,Hertsmere,E12000006,East of England


In [97]:
# create a LAD to RGN lookup table
LAD_Region_lookup.to_csv("LAD_Region_lookup.csv", sep=',')

In [10]:
A_list = []
A_RegReg_list = []
year_list = []

for year in range(2005,2011):
    # does 2005-2010 inclusive
    filepath = "A_LAD_" + str(year) + "_binary.mtx" # file name to read in 
    A = mmread(filepath) # temporary
    
    year_list.append(year)
    
    A = A.toarray()
    
    A_LADReg = list()
    A_RegReg =list() # empty numpy array to fill, one row
    # to make "Reg by Reg"
    
    for region in regions: 
        
        # find all the indexes for the given reg
        row_indexes = LAD_Region_lookup.loc[LAD_Region_lookup["RGN11NM"] == region, "index"]
        # make a numpy array and subtract 1, as python indexes from 0 not 1
        row_indexes  = row_indexes.to_numpy()
        row_indexes = [x - 1 for x in row_indexes]
        column_indexes = row_indexes
        
        # find all the rows with the indexes for that region, and add them together to make one row 
        row = A[row_indexes, :]
        row = np.sum(row, axis=0)
        
        # one row for each region
        A_LADReg.append(row)
        
    A_LADReg = np.asarray(A_LADReg)
    
    for region in regions: 
        
        # find all the columns with the indexes for that region, from the already row summed matrix, 
        # and add them together to make one column 
        column_indexes = LAD_Region_lookup.loc[LAD_Region_lookup["RGN11NM"] == region, "index"]
        column_indexes = column_indexes.to_numpy()
        column_indexes = [x - 1 for x in column_indexes]

        column = A_LADReg[:, column_indexes]
        column = np.sum(column, axis=1)
        
        A_RegReg.append(column)
    
    A_RegReg = np.asarray(A_RegReg)
    A_RegReg = sparse.coo_matrix(A_RegReg)

    # add a matrix for each year to the list
    A_RegReg_list.append(A_RegReg)
    
A_RegReg_all = hstack(A_RegReg_list)

In [11]:
A_RegReg_all.shape

(10, 60)

In [12]:
# A_RegReg_list[0].todense()

In [13]:
# save all the Reg by Reg matrices

for i in range(0,6):
    mat = A_RegReg_list[i]
    mmwrite("A_Reg_" + str(i+2005) + "_binary.mtx", mat)

## Embed the Reg $\times$ Reg matrices using UASE

Remember these cannot be directly compared to the LAD $\times$ LAD matrices, as they create a different embedding space.

In [14]:
A = A_RegReg_all.todense()
A = A.astype(float) #as float point ytpe
u, s, vt = svds(A, 9)
v = vt.T

d = 4
s_idx = np.argsort(s)[::-1][0:d]
ya = v[:, s_idx] @ np.diag(np.sqrt(s[s_idx]))

n = A.shape[0]

In [15]:
yadf = pd.DataFrame(ya)

# rename the columns - this is called a list comprehension
yadf.columns = ["Dimension_{}".format(i+1) for i in range(yadf.shape[1])] 

# add a year column that repeats each value n times before moving on to the next value
yadf["Year"] = np.repeat(year_list, n)

# repeats nodes list 6 times
yadf["Reg"] = list(regions)*6 # .values to make in a np array


In [16]:
yadf.head()

Unnamed: 0,Dimension_1,Dimension_2,Dimension_3,Dimension_4,Year,Reg
0,1.229034,2.844778,-0.626471,-1.215186,2005,North East
1,5.105868,11.287056,-6.504202,-15.954646,2005,North West
2,2.762752,5.718357,-0.343147,-2.543602,2005,Yorkshire and The Humber
3,3.734911,7.665915,-1.437084,-2.083348,2005,East Midlands
4,124.10673,-23.805763,1.647874,0.943656,2005,West Midlands


In [17]:
RGNdf_E_W = pd.read_csv("RGNdf_E_W.csv")

In [18]:
RGNdf_E_W

Unnamed: 0,objectid,nuts118cd,nuts118nm,col,Xscaled,Yscaled
0,1,UKC,North East (England),#6E33FF,0.430817,1.0
1,2,UKD,North West (England),#4033C2,0.24934,0.759078
2,3,UKE,Yorkshire and The Humber,#9533BC,0.582389,0.738079
3,4,UKF,East Midlands (England),#AE337A,0.683235,0.479684
4,5,UKG,West Midlands (England),#58335E,0.346998,0.367066
5,6,UKH,East of England,#FF3351,1.0,0.316553
6,7,UKI,London,#DA3320,0.85675,0.126829
7,8,UKJ,South East (England),#C23312,0.759104,0.070675
8,9,UKK,South West (England),#233300,0.135607,0.0
9,10,UKL,Wales,#003355,0.0,0.333197


In [19]:
# remove the " (England)" from the nuts11nm names so they match the RGN11NM
RGNdf_E_W['nuts118nm'] = RGNdf_E_W['nuts118nm'].str.replace(" (England)", "", regex=False)
#.map(lambda x: x.rstrip(' (England)'))

In [20]:
RGNdf_E_W['nuts118nm']

0                  North East
1                  North West
2    Yorkshire and The Humber
3               East Midlands
4               West Midlands
5             East of England
6                      London
7                  South East
8                  South West
9                       Wales
Name: nuts118nm, dtype: object

In [21]:
plotdata = yadf.merge(RGNdf_E_W, left_on="Reg", right_on="nuts118nm", how="left")

In [45]:
fig = px.scatter(plotdata, x="Dimension_3", y="Dimension_4",
                color="col", 
                color_discrete_sequence=plotdata["col"], 
                 animation_frame="Year", hover_data=["Reg", "col"], 
                labels = {
                    "Dimension_3": "Dimension 3", 
                    "Dimension_4": "Dimension 4"})

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(plotdata["Dimension_3"]), min(plotdata["Dimension_3"])] )
fig.update_yaxes(range = [max(plotdata["Dimension_4"]), min(plotdata["Dimension_4"])] )

fig.show(renderer="browser")

In [46]:
fig = px.scatter(plotdata, x="Dimension_1", y="Dimension_2",
                color="col", 
                color_discrete_sequence=plotdata["col"], 
                 animation_frame="Year", hover_data=["Reg", "col"])

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(plotdata["Dimension_1"]), min(plotdata["Dimension_1"])] )
fig.update_yaxes(range = [max(plotdata["Dimension_2"]), min(plotdata["Dimension_2"])] )

fig.show(renderer="browser")

These do not show like clearly a N/S divide. They space themselves really differently to when you embed at LAD level

In [47]:
plotdata.head(10)

Unnamed: 0,Dimension_1,Dimension_2,Dimension_3,Dimension_4,Year,Reg,objectid,nuts118cd,nuts118nm,col,Xscaled,Yscaled
0,1.229034,2.844778,-0.626471,-1.215186,2005,North East,1,UKC,North East,#6E33FF,0.430817,1.0
1,5.105868,11.287056,-6.504202,-15.954646,2005,North West,2,UKD,North West,#4033C2,0.24934,0.759078
2,2.762752,5.718357,-0.343147,-2.543602,2005,Yorkshire and The Humber,3,UKE,Yorkshire and The Humber,#9533BC,0.582389,0.738079
3,3.734911,7.665915,-1.437084,-2.083348,2005,East Midlands,4,UKF,East Midlands,#AE337A,0.683235,0.479684
4,124.10673,-23.805763,1.647874,0.943656,2005,West Midlands,5,UKG,West Midlands,#58335E,0.346998,0.367066
5,4.146759,12.048258,-6.414062,-10.230727,2005,South West,9,UKK,South West,#233300,0.135607,0.0
6,10.198458,38.292612,28.680036,-1.056998,2005,East of England,6,UKH,East of England,#FF3351,1.0,0.316553
7,7.854469,25.851904,-16.872229,10.054073,2005,South East,8,UKJ,South East,#C23312,0.759104,0.070675
8,5.947578,17.147903,-3.535384,-0.773534,2005,London,7,UKI,London,#DA3320,0.85675,0.126829
9,1.599637,4.161819,-1.253869,-2.025308,2005,Wales,10,UKL,Wales,#003355,0.0,0.333197


In [48]:
animationData = pd.read_csv("animationData.csv")

In [49]:
animationData2 = pd.read_csv("animationData.csv")

In [50]:
animationData2.shape

(2088, 11)

In [51]:
# add in region info from the LAD name
animationData2 = animationData2.merge(LAD_Region_lookup, left_on="LAD", right_on="LAD11NM", how="left")

In [52]:
animationData2.shape

(2088, 17)

In [53]:
region_colouring = plotdata[['nuts118nm', 'col']]

In [54]:
region_colouring = region_colouring.drop_duplicates()

In [55]:
region_colouring.shape

(10, 2)

In [56]:
animationData2 =animationData2.merge(region_colouring, left_on="RGN11NM", right_on="nuts118nm", how="left")

In [57]:
animationData2.shape

(2088, 19)

In [58]:
animationData2[animationData2["LAD_x"]=="Watford"]

Unnamed: 0,dim1,dim2,dim3,dim4,year,Xscaled,Yscaled,LAD_x,col_x,LAD3_4,LAD1_2,index,LAD_y,LAD11CD,LAD11NM,RGN11CD,RGN11NM,nuts118nm,col_y
123,-0.020126,-0.026481,-0.016601,0.009744,2005,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351
471,-0.013611,-0.027134,0.011405,-0.007169,2006,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351
819,-0.020185,-0.028369,-0.002241,-0.013088,2007,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351
1167,-0.030954,-0.034485,0.004278,-0.025362,2008,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351
1515,-0.009454,-0.019092,-0.006786,0.005981,2009,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351
1863,-0.011058,0.005391,-0.022442,-0.005418,2010,0.753795,0.318397,Watford,#C03351,,,344,Watford,E07000103,Watford,E12000006,East of England,East of England,#FF3351


In [59]:
test = animationData2["col_y"].unique()
test

array(['#DA3320', '#58335E', '#9533BC', '#233300', '#C23312', '#4033C2',
       '#6E33FF', '#AE337A', '#003355', '#FF3351'], dtype=object)

In [60]:
fig = px.scatter(animationData2, x="dim3", y="dim4",
                color="col_y", 
                color_discrete_sequence=animationData2["col_y"], 
                 animation_frame="year", hover_data=["LAD11NM", "col_y"], 
                labels = {
                    "dim3": "Dimension 3", 
                    "dim4": "Dimension 4"})

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData2["dim3"]), min(animationData2["dim3"])] )
fig.update_yaxes(range = [max(animationData2["dim4"]), min(animationData2["dim4"])] )

fig.show(renderer="browser")

In [61]:
fig = px.scatter(animationData2, x="dim3", y="dim4",
                color="col_y", 
                animation_frame="year", 
                 hover_data=["LAD11NM", "col_y"], 
                labels = {
                    "dim3": "Dimension 3", 
                    "dim4": "Dimension 4"})

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData2["dim3"]), min(animationData2["dim3"])] )
fig.update_yaxes(range = [max(animationData2["dim4"]), min(animationData2["dim4"])] )

fig.show(renderer="browser")

In [62]:
fig = px.scatter(animationData2[animationData2["LAD_x"]=="Watford"], x="dim3", y="dim4",
                color="col_y", 
                color_discrete_sequence=animationData2["col_y"], 
                 animation_frame="year")
fig.show(renderer="browser")

In [42]:
fig = px.scatter(animationData, x="dim3", y="dim4",
                color="col", 
                color_discrete_sequence=animationData["col"], 
                 animation_frame="year", hover_data=["LAD", "col"], 
                text="LAD3_4",
                labels = {
                    "dim3": "Dimension 3", 
                    "dim4": "Dimension 4"})

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData["dim3"]), min(animationData["dim3"])] )
fig.update_yaxes(range = [max(animationData["dim4"]), min(animationData["dim4"])] )

fig.show(renderer="browser")

## Colour the LAD level embeddings by urban-rural
### It only has England adata!!


In [43]:
# THIS ONLY HAS DATA FOR ENGLAND!!!
ruLAD = pd.read_csv("RUC_LAD_2011_EN_LU.csv")

In [73]:
animDataRU = animationData.merge(LAD_Region_lookup, left_on="LAD", right_on="LAD", how="left")
animDataRU = animDataRU.merge(ruLAD, left_on="LAD11CD", right_on="LAD11CD", how="left")

In [75]:
animDataRU.shape
# all the Welsh LADs will have no indicator for their urban rural status


(2088, 30)

In [83]:
animDataRU.head(1)

Unnamed: 0,dim1,dim2,dim3,dim4,year,Xscaled,Yscaled,LAD,col,LAD3_4,...,Urban Major Conurbation population 2011,Total Urban population 2011,Total population 2011,Hub towns (rural related) population included in Urban population 2011,Rural including hub towns (rural & rural related) population 2011,Rural including hub towns (rural & rural related) population as % of Total population 2011,RUC11CD,RUC11,Broad RUC11,URbinary
0,-0.107843,0.1474271,-0.085918,0.067565,2005,0.784615,0.289551,Westminster,#C8334A,Westminster,...,219396.0,219396.0,219396.0,0.0,0.0,0.0,6.0,Urban with Major Conurbation,Predominantly Urban,1
1,-0.075522,0.05805172,-0.039126,-0.008742,2005,0.795622,0.282016,Southwark,#CB3348,,...,288283.0,288283.0,288283.0,0.0,0.0,0.0,6.0,Urban with Major Conurbation,Predominantly Urban,1
2,-0.060125,-3.298477e-07,-0.026208,0.007644,2005,0.800248,0.290575,Tower Hamlets,#CC334A,,...,254096.0,254096.0,254096.0,0.0,0.0,0.0,6.0,Urban with Major Conurbation,Predominantly Urban,1
3,-0.073188,0.05153974,-0.043687,-0.025779,2005,0.777226,0.285945,Hammersmith and Fulham,#C63349,,...,182493.0,182493.0,182493.0,0.0,0.0,0.0,6.0,Urban with Major Conurbation,Predominantly Urban,1
4,-0.055033,-0.006681403,-0.044061,0.006443,2005,0.780755,0.287109,Kensington and Chelsea,#C73349,,...,158649.0,158649.0,158649.0,0.0,0.0,0.0,6.0,Urban with Major Conurbation,Predominantly Urban,1


Colour by 6 level Urban-rural classification:

In [86]:
fig = px.scatter(animDataRU, x="dim1", y="dim2",
                color="RUC11CD", 
                color_discrete_sequence=animDataRU["RUC11CD"], 
                 animation_frame="year", hover_data=["LAD", "RUC11CD", "RUC11"])

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData["dim1"]), min(animationData["dim1"])] )
fig.update_yaxes(range = [max(animationData["dim2"]), min(animationData["dim2"])] )

fig.show(renderer="browser")

In [87]:
fig = px.scatter(animDataRU, x="dim3", y="dim4",
                color="RUC11CD", 
                color_discrete_sequence=animDataRU["RUC11CD"], 
                 animation_frame="year", hover_data=["LAD", "RUC11CD", "RUC11"])

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData["dim3"]), min(animationData["dim3"])] )
fig.update_yaxes(range = [max(animationData["dim4"]), min(animationData["dim4"])] )

fig.show(renderer="browser")

Colour by a binary indicator of urban/rural created by the RUC11CD value:

In [None]:
fig = px.scatter(animDataRU, x="dim1", y="dim2",
                color="URbinary", 
                color_discrete_sequence=animDataRU["URbinary"], 
                 animation_frame="year", hover_data=["LAD", "RUC11CD", "RUC11"])

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData["dim1"]), min(animationData["dim1"])] )
fig.update_yaxes(range = [max(animationData["dim2"]), min(animationData["dim2"])] )

fig.show(renderer="browser")

In [88]:
fig = px.scatter(animDataRU, x="dim3", y="dim4",
                color="URbinary", 
                color_discrete_sequence=animDataRU["URbinary"], 
                 animation_frame="year", hover_data=["LAD", "RUC11CD", "RUC11"])

fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(showlegend=False)
fig.update_xaxes(range = [max(animationData["dim3"]), min(animationData["dim3"])] )
fig.update_yaxes(range = [max(animationData["dim4"]), min(animationData["dim4"])] )

fig.show(renderer="browser")

#### NO obvious links between urban and rural and any of the 4 embedding dimensions are obvious
It slightly seems that East-West you see more urban, but this makes sense, as Wales is more rural than East England/