In [4]:
# Load Data

import pandas as pd

# Load CSV file (adjust the path to match your file's location)
df = pd.read_csv("imd2019lsoa.csv")
lsoa_df = pd.read_csv("LSOA_codes.csv")

# Head of both datasets
print(df.head())
print('here')
print(lsoa_df.head())

  FeatureCode  DateCode Measurement  Units    Value  \
0   E01005278      2019        Rank    NaN  11281.0   
1   E01006616      2019     Decile     NaN      1.0   
2   E01005236      2019        Rank    NaN   4565.0   
3   E01031873      2019        Rank    NaN  25826.0   
4   E01007367      2019     Decile     NaN      2.0   

                   Indices of Deprivation  
0            b. Income Deprivation Domain  
1  a. Index of Multiple Deprivation (IMD)  
2            b. Income Deprivation Domain  
3            b. Income Deprivation Domain  
4  a. Index of Multiple Deprivation (IMD)  
here
   LSOA code
0  E01000001
1  E01000002
2  E01000003
3  E01000005
4  E01000006


In [5]:
# Data Cleaning

# Drop 'Context' column if it exists and is entirely empty
if "Units" in df.columns and df["Units"].isna().all():
    df = df.drop(columns=["Units"])

# Drop duplicates
df = df.drop_duplicates()

# Keep only London LSOA's
df = df[df["FeatureCode"].isin(lsoa_df["LSOA code"])]

# Print csv with only London LSOA's
# df.to_csv("imd2019lsoa_london_cleaned.csv", index = False)

In [6]:
# Null data check

null_counts = df.isnull().sum()
print(null_counts[null_counts > 0])

Series([], dtype: int64)


In [7]:
# Check data size

print(len(df))

165420


In [8]:
# List indices

indices = sorted(df["Indices of Deprivation"].unique())
for i in indices:
    print(i)


a. Index of Multiple Deprivation (IMD)
b. Income Deprivation Domain
c. Employment Deprivation Domain
d. Education, Skills and Training Domain
e. Health Deprivation and Disability Domain
f. Crime Domain
g. Barriers to Housing and Services Domain
h. Living Environment Deprivation Domain
i. Income Deprivation Affecting Children Index (IDACI)
j. Income Deprivation Affecting Older People Index (IDAOPI)


In [9]:
# List top 10 for all indices

top10_imd_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("a")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_income_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("b")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_employment_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("c")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_education_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("d")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_health_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("e")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_crime_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("f")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_housing_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("g")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_living_environment_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("h")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_idaci_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("i")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")
top10_idaopi_df = df[(df["Indices of Deprivation"].str.lower().str.startswith("j")) & 
                   (df["Measurement"] == "Rank")].nsmallest(10, "Value")

print(top10_imd_df)
print(top10_income_df)
print(top10_employment_df)
print(top10_education_df)
print(top10_health_df)
print(top10_crime_df)
print(top10_housing_df)
print(top10_living_environment_df)
print(top10_idaci_df)
print(top10_idaopi_df)

       FeatureCode  DateCode Measurement   Value  \
835680   E01002082      2019        Rank   546.0   
746631   E01002857      2019        Rank  1012.0   
848903   E01001178      2019        Rank  1096.0   
755564   E01000601      2019        Rank  1192.0   
764651   E01002853      2019        Rank  1212.0   
850150   E01001847      2019        Rank  1315.0   
754636   E01002039      2019        Rank  1411.0   
857798   E01002036      2019        Rank  1464.0   
742315   E01001483      2019        Rank  1643.0   
754476   E01002071      2019        Rank  1685.0   

                        Indices of Deprivation  
835680  a. Index of Multiple Deprivation (IMD)  
746631  a. Index of Multiple Deprivation (IMD)  
848903  a. Index of Multiple Deprivation (IMD)  
755564  a. Index of Multiple Deprivation (IMD)  
764651  a. Index of Multiple Deprivation (IMD)  
850150  a. Index of Multiple Deprivation (IMD)  
754636  a. Index of Multiple Deprivation (IMD)  
857798  a. Index of Multiple Depriv