# Mapping Matrix

In [8]:
import pandas as pd
import numpy as np

In [7]:
mapping_raw = pd.read_csv("mapping_raw.csv")
mapping_raw = mapping_raw.sort_values(['PCON10CD', 'PCON24CD'])
mapping_raw = mapping_raw.reset_index(drop=True)
mapping_raw.head()

Unnamed: 0,PCON10CD,PCON24CD
0,E14000530,E14001063
1,E14000531,E14001064
2,E14000532,E14001065
3,E14000533,E14001066
4,E14000534,E14001067


In [10]:
# Create sets of unique values for both columns
unique_pcon10 = mapping_raw['PCON10CD'].unique()
unique_pcon24 = mapping_raw['PCON24CD'].unique()

# Create an empty matrix filled with zeros
mapping_matrix = pd.DataFrame(
    0,
    index=unique_pcon10,
    columns=unique_pcon24
)

# Fill the matrix using a for loop
for _, row in mapping_raw.iterrows():
    mapping_matrix.loc[row['PCON10CD'], row['PCON24CD']] = 1

mapping_matrix.head()

Unnamed: 0,E14001063,E14001064,E14001065,E14001066,E14001067,E14001294,E14001366,E14001599,E14001068,E14001140,...,W07000100,W07000087,W07000085,W07000099,W07000106,W07000084,W07000086,W07000092,W07000088,W07000110
E14000530,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
E14000531,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
E14000532,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
E14000533,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
E14000534,0,0,0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


# check 1
Check the many-to-one and one-to-many relationships from mapping matrix:


In [11]:
print("\nPCON10 codes that map to multiple PCON24 codes:")
print((mapping_matrix.sum(axis=1) > 1).sum())  # How many PCON10 codes map to multiple PCON24

print("\nPCON24 codes that map to multiple PCON10 codes:")
print((mapping_matrix.sum(axis=0) > 1).sum())  # How many PCON24 codes map to multiple PCON10


PCON10 codes that map to multiple PCON24 codes:
497

PCON24 codes that map to multiple PCON10 codes:
502


Check from raw data to get the same numbers:

In [12]:
# PCON10 codes that map to multiple PCON24
pcon10_multiple = mapping_raw.groupby('PCON10CD')['PCON24CD'].nunique()
pcon10_multiple_count = (pcon10_multiple > 1).sum()
print("\nFrom raw data - PCON10 codes that map to multiple PCON24:", 
      pcon10_multiple_count)

# PCON24 codes that map to multiple PCON10
pcon24_multiple = mapping_raw.groupby('PCON24CD')['PCON10CD'].nunique()
pcon24_multiple_count = (pcon24_multiple > 1).sum()
print("From raw data - PCON24 codes that map to multiple PCON10:", 
      pcon24_multiple_count)


From raw data - PCON10 codes that map to multiple PCON24: 497
From raw data - PCON24 codes that map to multiple PCON10: 502


# Check 2
Example of a PCON10 that maps to multiple PCON24 from raw data:

In [13]:
# Print example of a PCON10 that maps to multiple PCON24
print("\nExample of a PCON10 with multiple mappings:")
example_pcon10 = pcon10_multiple[pcon10_multiple > 1].index[0]
print(mapping_raw[mapping_raw['PCON10CD'] == example_pcon10][['PCON10CD', 'PCON24CD']])


Example of a PCON10 with multiple mappings:
    PCON10CD   PCON24CD
4  E14000534  E14001067
5  E14000534  E14001294
6  E14000534  E14001366
7  E14000534  E14001599


Check it for mapping matrix to see if we get the same:

In [14]:
# Check the specific PCON10 example (E14000534)
print("\nChecking PCON10 'E14000534' in matrix:")
pcon10_row = mapping_matrix.loc['E14000534']
print("Maps to these PCON24 (should see 1s):")
print(pcon10_row[['E14001067', 'E14001294', 'E14001366', 'E14001599']])


Checking PCON10 'E14000534' in matrix:
Maps to these PCON24 (should see 1s):
E14001067    1
E14001294    1
E14001366    1
E14001599    1
Name: E14000534, dtype: int64


# Check 3
Example of a PCON24 that maps to multiple PCON10:

In [15]:
# Print example of a PCON24 that maps to multiple PCON10
print("\nExample of a PCON24 with multiple mappings:")
example_pcon24 = pcon24_multiple[pcon24_multiple > 1].index[0]
print(mapping_raw[mapping_raw['PCON24CD'] == example_pcon24][['PCON10CD', 'PCON24CD']])


Example of a PCON24 with multiple mappings:
      PCON10CD   PCON24CD
0    E14000530  E14001063
680  E14000844  E14001063


Check it for mapping matrix to see if we get the same:

In [16]:
# Check the specific PCON24 example (E14001063)
print("\nChecking PCON24 'E14001063' in matrix:")
pcon24_col = mapping_matrix['E14001063']
print("Maps to these PCON10 (should see 1s):")
print(pcon24_col[['E14000530', 'E14000844']])


Checking PCON24 'E14001063' in matrix:
Maps to these PCON10 (should see 1s):
E14000530    1
E14000844    1
Name: E14001063, dtype: int64
