<a href="https://colab.research.google.com/github/JosephFalconio/Joseph-Falconio_dissertation/blob/main/ethnicity_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load datasets
df_2001 = pd.read_csv('ethnicity_2001_converted_to_2021_LSOA.csv')
df_2011 = pd.read_csv('ethnicity_2011_converted_to_2021_LSOA (3).csv')
df_2021 = pd.read_csv('ethnicity_2021.csv')

# Standardise column names
df_2001.columns = df_2001.columns.str.strip().str.replace(' ', '_').str.replace('%_', 'pct_')
df_2011.columns = df_2011.columns.str.strip().str.replace(' ', '_').str.replace('%_', 'pct_')
df_2021.columns = df_2021.columns.str.strip().str.replace(' ', '_').str.replace('%_', 'pct_')
df_2021 = df_2021.rename(columns={'Total:_All_usual_residents': 'Total_2021'})

# Drop extra Name column from 2021
df_2021 = df_2021.drop(columns=['Name'])

# Rename % columns to include year suffix
for col in df_2001.columns:
    if col.startswith('pct_'):
        df_2001 = df_2001.rename(columns={col: col + '_2001'})
for col in df_2011.columns:
    if col.startswith('pct_'):
        df_2011 = df_2011.rename(columns={col: col + '_2011'})
for col in df_2021.columns:
    if col.startswith('pct_'):
        df_2021 = df_2021.rename(columns={col: col + '_2021'})

# Merge all three datasets on LSOA21CD
df = df_2001.merge(df_2011, on='LSOA21CD').merge(df_2021, on='LSOA21CD')

# Convert all % columns to numeric
pct_cols = [col for col in df.columns if col.startswith('pct_')]
df[pct_cols] = df[pct_cols].apply(pd.to_numeric, errors='coerce')

# Define ethnicity groups
groups = ['White', 'Mixed', 'Asian', 'Black', 'Other']

# Calculate percentage point changes
for group in groups:
    df[f'Change_{group}_2001_2021'] = df[f'pct_{group}_2021'] - df[f'pct_{group}_2001']
    df[f'Change_{group}_2011_2021'] = df[f'pct_{group}_2021'] - df[f'pct_{group}_2011']
    df[f'Change_{group}_2001_2011'] = df[f'pct_{group}_2011'] - df[f'pct_{group}_2001']

#  Export final dataset
df.to_csv('Ethnicity_merged_with_changes.csv', index=False)
print("Ethnicity change calculations complete.")


✅ Ethnicity change calculations complete.


In [None]:
#  Define East Leeds LSOAs
ele_lsoas = [
    'E01011715', 'E01011719', 'E01011714', 'E01011716', 'E01011717', 'E01011718',
    'E01011720', 'E01011721', 'E01011722', 'E01011723', 'E01011724',
    'E01011406', 'E01011407', 'E01011408', 'E01011409', 'E01011410',
    'E01011412', 'E01011411'
]

# Filter for ELE only
df_ele = df[df['LSOA21CD'].isin(ele_lsoas)].copy()

# Select only change columns + LSOA ID
change_cols = [col for col in df.columns if col.startswith('Change_')]
df_ele = df_ele[['LSOA21CD'] + change_cols]

# Export to CSV
df_ele.to_csv('Ethnicity_ELE_changes_only.csv', index=False)
print("Exported ELE ethnicity change columns only.")


✅ Exported ELE ethnicity change columns only.


In [None]:
# Calculate mean change for each ethnicity group across ELE
mean_changes = df_ele.drop(columns='LSOA21CD').mean().round(2)

# === Print results ===
print("Mean Ethnicity Change in East Leeds (Percentage Points):")
print(mean_changes)

# Save to CSV
mean_changes.to_frame(name='Mean_Change_ELE').to_csv('Ethnicity_ELE_mean_changes.csv')


📊 Mean Ethnicity Change in East Leeds (Percentage Points):
Change_White_2001_2021   -6.32
Change_White_2011_2021   -4.09
Change_White_2001_2011   -2.23
Change_Mixed_2001_2021    1.93
Change_Mixed_2011_2021    1.26
Change_Mixed_2001_2011    0.67
Change_Asian_2001_2021    2.67
Change_Asian_2011_2021    1.68
Change_Asian_2001_2011    0.99
Change_Black_2001_2021    1.45
Change_Black_2011_2021    0.78
Change_Black_2001_2011    0.67
Change_Other_2001_2021    0.27
Change_Other_2011_2021    0.38
Change_Other_2001_2011   -0.10
dtype: float64


In [None]:
# Define Top 10 2011 - 2021 Growth LSOAs
top10_lsoas = [
    'E01011516', 'E01011515', 'E01011323', 'E01011693', 'E01011673',
    'E01011276', 'E01011632', 'E01011735', 'E01011293', 'E01011283'

]

# Filter for Top 10 only
df_top10 = df[df['LSOA21CD'].isin(top10_lsoas)].copy()

# elect only change columns + LSOA ID
change_cols = [col for col in df.columns if col.startswith('Change_')]
df_top10 = df_top10[['LSOA21CD'] + change_cols]

# Export to CSV
df_top10.to_csv('Ethnicity_Top10_changes_only.csv', index=False)
print("Exported Top 10 ethnicity change columns only.")


✅ Exported Top 10 ethnicity change columns only.


In [None]:
# Top 10% urban change 11-21
#  Calculate mean change for each ethnicity group across Top 10
mean_changes_top10 = df_top10.drop(columns='LSOA21CD').mean().round(2)

# Print results
print("Mean Ethnicity Change in Top 10 Growth LSOAs (Percentage Points):")
print(mean_changes_top10)

# Save to CSV
mean_changes_top10.to_frame(name='Mean_Change_Top10').to_csv('Ethnicity_Top10_mean_changes.csv')


📊 Mean Ethnicity Change in Top 10 Growth LSOAs (Percentage Points):
Change_White_2001_2021   -11.63
Change_White_2011_2021    -5.32
Change_White_2001_2011    -6.31
Change_Mixed_2001_2021     1.70
Change_Mixed_2011_2021     0.54
Change_Mixed_2001_2011     1.16
Change_Asian_2001_2021     4.71
Change_Asian_2011_2021     1.57
Change_Asian_2001_2011     3.14
Change_Black_2001_2021     3.68
Change_Black_2011_2021     1.92
Change_Black_2001_2011     1.75
Change_Other_2001_2021     1.58
Change_Other_2011_2021     1.32
Change_Other_2001_2011     0.26
dtype: float64


In [None]:
# Define Top 10 2001–2021 Growth LSOAs
top10_lsoas = [
    'E01011579', 'E01011583', 'E01011526', 'E01011466', 'E01011541',
    'E01011336', 'E01011611', 'E01011528', 'E01011609', 'E01011574'
]

# Filter for Top 10 only
df_top10 = df[df['LSOA21CD'].isin(top10_lsoas)].copy()

# Select only change columns + LSOA ID
change_cols = [col for col in df.columns if col.startswith('Change_')]
df_top10 = df_top10[['LSOA21CD'] + change_cols]

#  Export to CSV
df_top10.to_csv('Ethnicity_Top10_2001_2021_changes_only.csv', index=False)
print("Exported Top 10 ethnicity change columns (2001–2021) only.")


✅ Exported Top 10 ethnicity change columns (2001–2021) only.


In [None]:
# Filter only 2001–2021 change columns
change_2001_2021_cols = [col for col in df_top10.columns if '2001_2021' in col]

#  Calculate mean change per ethnicity group
mean_changes_top10 = df_top10[change_2001_2021_cols].mean().round(2)

# Print results
print("Mean Ethnicity Change in Top 10 Growth LSOAs (2001–2021):")
print(mean_changes_top10)

#  Optional: Save to CSV
mean_changes_top10.to_frame(name='Mean_Change_2001_2021').to_csv('Ethnicity_Top10_mean_changes_2001_2021.csv')




📊 Mean Ethnicity Change in Top 10 Growth LSOAs (2001–2021):
Change_White_2001_2021   -4.03
Change_Mixed_2001_2021    0.73
Change_Asian_2001_2021    1.61
Change_Black_2001_2021    1.28
Change_Other_2001_2021    0.44
dtype: float64


In [None]:
# Calculate mean change for each ethnicity group across Leeds
mean_changes_2001_2011 = df[[f'Change_{group}_2001_2021' for group in groups]].mean().round(2)
mean_changes_2011_2021 = df[[f'Change_{group}_2011_2021' for group in groups]].mean().round(2)

# Print results
print("📊 Mean Ethnicity Change in Leeds (2001–2021):")
print(mean_changes_2001_2011)

print("\n Mean Ethnicity Change in Leeds (2011–2021):")
print(mean_changes_2011_2021)

# Optional: Save to CSV
mean_changes_2001_2011.to_frame(name='Mean_Change_2001_2021').to_csv('Ethnicity_Leeds_mean_changes_2001_2021.csv')
mean_changes_2011_2021.to_frame(name='Mean_Change_2011_2021').to_csv('Ethnicity_Leeds_mean_changes_2011_2021.csv')


📊 Mean Ethnicity Change in Leeds (2001–2021):
Change_White_2001_2021   -11.64
Change_Mixed_2001_2021     1.92
Change_Asian_2001_2021     4.65
Change_Black_2001_2021     3.73
Change_Other_2001_2021     1.34
dtype: float64

📊 Mean Ethnicity Change in Leeds (2011–2021):
Change_White_2011_2021   -5.50
Change_Mixed_2011_2021    0.76
Change_Asian_2011_2021    1.66
Change_Black_2011_2021    1.93
Change_Other_2011_2021    1.14
dtype: float64
