# Loneliness, Deprivation and Population Density

## (outliers are not dropped in this file)

## Read in IMD and loneliness index data for England

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
imd = pd.read_csv('england/imd2015eng.csv')

In [3]:
loneliness = pd.read_csv('england/final_data.csv')

IMD data is a single snapshot for each LSOA. Loneliness in theory has entries for each year for each postcode but in practice is limited to a single year. Will need to produce an aggregate view for each LSOA to fuse with the IMD data

In [4]:
agg_cols = {'loneills': 'mean', 'addiction_zscore' : 'mean',  }
loneliness_per_lsoa11_2 = loneliness.groupby(['lsoa11', 'Year'], as_index=False).agg(agg_cols)

loneliness_imd2 = pd.merge(loneliness_per_lsoa11_2, imd, how='left', left_on=['lsoa11'], right_on=['LSOA code (2011)'])

In [5]:
loneliness_imd2

Unnamed: 0,lsoa11,Year,loneills,addiction_zscore,LSOA code (2011),LSOA name (2011),Local Authority District code (2013),Local Authority District name (2013),Index of Multiple Deprivation (IMD) Score,Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived),...,Indoors Sub-domain Rank (where 1 is most deprived),Indoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs),Outdoors Sub-domain Score,Outdoors Sub-domain Rank (where 1 is most deprived),Outdoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs),Total population: mid 2012 (excluding prisoners),Dependent Children aged 0-15: mid 2012 (excluding prisoners),Population aged 16-59: mid 2012 (excluding prisoners),Older population aged 60 and over: mid 2012 (excluding prisoners),Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners)
0,E01000007,2015,-3.032939,-0.001290,E01000007,Barking and Dagenham 015A,E09000002,Barking and Dagenham,42.513,3778.0,...,11759.0,4.0,1.279,2348.0,1.0,1478.0,372.0,1003.0,103.0,979.50
1,E01000009,2015,-6.786468,-0.101102,E01000009,Barking and Dagenham 016B,E09000002,Barking and Dagenham,33.412,6750.0,...,9427.0,3.0,1.027,3794.0,2.0,1839.0,340.0,1257.0,242.0,1233.50
2,E01000010,2015,-1.346273,-0.101102,E01000010,Barking and Dagenham 015C,E09000002,Barking and Dagenham,30.914,7796.0,...,5411.0,2.0,0.658,6946.0,3.0,3120.0,741.0,2241.0,138.0,2226.25
3,E01000012,2015,-2.063601,-0.101102,E01000012,Barking and Dagenham 015D,E09000002,Barking and Dagenham,36.910,5475.0,...,4492.0,2.0,0.992,4034.0,2.0,2007.0,619.0,1282.0,106.0,1248.50
4,E01000017,2015,-0.972582,-0.101102,E01000017,Barking and Dagenham 009C,E09000002,Barking and Dagenham,40.563,4357.0,...,11830.0,4.0,0.525,8498.0,3.0,1622.0,409.0,991.0,222.0,957.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6266,E01033758,2015,-0.932807,-0.101102,E01033758,Liverpool 060B,E08000012,Liverpool,35.068,6121.0,...,2509.0,1.0,1.704,1003.0,1.0,1997.0,61.0,1868.0,68.0,1904.25
6267,E01033759,2015,-2.025628,-0.101102,E01033759,Liverpool 010F,E08000012,Liverpool,54.124,1582.0,...,5987.0,2.0,0.834,5284.0,2.0,1195.0,283.0,702.0,210.0,716.25
6268,E01033762,2015,1.970892,-0.101102,E01033762,Liverpool 060E,E08000012,Liverpool,26.541,9976.0,...,6832.0,3.0,1.310,2210.0,1.0,1795.0,50.0,1669.0,76.0,1678.25
6269,E01033764,2015,0.166426,0.553710,E01033764,Liverpool 022E,E08000012,Liverpool,76.865,72.0,...,25196.0,8.0,1.405,1835.0,1.0,2158.0,339.0,1249.0,570.0,1303.00


## rename columns


In [6]:
# here is where I rename those columns
big_dict = {'Outdoors Sub-domain Score':"outdoor_score",
                               'Year': 'year',
                               'LSOA code (2011)': 'lsoa_code',
                               'LSOA name (2011)': 'lsoa_name',
                               'Local Authority District code (2013)': 'district_code',
                               'Local Authority District name (2013)': 'district_name',
                               'Index of Multiple Deprivation (IMD) Score': 'imd_score',
                               'Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived)': 'imd_rank',
                               'Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)': 'imd_decile',
                               'Income Score (rate)': 'income_score',
                            'Income Rank (where 1 is most deprived)': 'income_rank',
       'Income Decile (where 1 is most deprived 10% of LSOAs)': 'income_decile',
       'Employment Score (rate)': 'employment_rate', 
                               'Employment Rank (where 1 is most deprived)': 'employment_score',
       'Employment Decile (where 1 is most deprived 10% of LSOAs)': 'employment_decile',
       'Education, Skills and Training Score': 'education_score',
       'Education, Skills and Training Rank (where 1 is most deprived)': 'education_rank',
       'Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)': 'education_decile',
       'Health Deprivation and Disability Score': 'health_score',
       'Health Deprivation and Disability Rank (where 1 is most deprived)': 'health_rank',
       'Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)': 'health_decile',
       'Crime Score': 'crime_score',
                               'Crime Rank (where 1 is most deprived)': 'crime_rank',
       'Crime Decile (where 1 is most deprived 10% of LSOAs)': 'crime_decile',
       'Barriers to Housing and Services Score': 'housing_score',
       'Barriers to Housing and Services Rank (where 1 is most deprived)': 'housing_rank',
       'Barriers to Housing and Services Decile (where 1 is most deprived 10% of LSOAs)': 'housing_decile',
       'Living Environment Score': 'living_score',
       'Living Environment Rank (where 1 is most deprived)': 'living_rank',
       'Living Environment Decile (where 1 is most deprived 10% of LSOAs)': 'living_decile',
       'Income Deprivation Affecting Children Index (IDACI) Score (rate)':'poor_children_score',
       'Income Deprivation Affecting Children Index (IDACI) Rank (where 1 is most deprived)': 'poor_children_rank',
       'Income Deprivation Affecting Children Index (IDACI) Decile (where 1 is most deprived 10% of LSOAs)': 'poor_children_decile',
       'Income Deprivation Affecting Older People (IDAOPI) Score (rate)': 'poor_older_score',
       'Income Deprivation Affecting Older People (IDAOPI) Rank (where 1 is most deprived)': 'poor_older_rank',
       'Income Deprivation Affecting Older People (IDAOPI) Decile (where 1 is most deprived 10% of LSOAs)': 'poor_older_decile',
       'Children and Young People Sub-domain Score': 'young_sub_score',
       'Children and Young People Sub-domain Rank (where 1 is most deprived)': 'young_sub_rank',
       'Children and Young People Sub-domain Decile (where 1 is most deprived 10% of LSOAs)':'young_sub_decile',
       'Adult Skills Sub-domain Score': 'adult_skills_sub_score',
       'Adult Skills Sub-domain Rank (where 1 is most deprived)': 'adult_skills_sub_rank',
       'Adult Skills Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': 'adult_skills_sub_decile',
       'Geographical Barriers Sub-domain Score': 'geog_barriers_sub_score',
       'Geographical Barriers Sub-domain Rank (where 1 is most deprived)': 'geog_barriers_sub_rank',
       'Geographical Barriers Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': 'geog_barriers_sub_decile',
       'Wider Barriers Sub-domain Score': 'wider_barriers_sub_score',
       'Wider Barriers Sub-domain Rank (where 1 is most deprived)': 'wider_barriers_sub_rank',
       'Wider Barriers Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': 'wider_barriers_sub_decile',
       'Indoors Sub-domain Score': 'indoor_sub_score',
       'Indoors Sub-domain Rank (where 1 is most deprived)': 'indoor_sub_rank',
       'Indoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': 'indoor_sub_decile',
       'Outdoors Sub-domain Score': 'outdoor_sub_score',
       'Outdoors Sub-domain Rank (where 1 is most deprived)': 'outdoor_sub_rank',
       'Outdoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': 'outdoor_sub_decile',
       'Total population: mid 2012 (excluding prisoners)':'total_population',
       'Dependent Children aged 0-15: mid 2012 (excluding prisoners)':'aged_under_15_population',
       'Population aged 16-59: mid 2012 (excluding prisoners)': 'aged_16-59_population',
       'Older population aged 60 and over: mid 2012 (excluding prisoners)': 'aged_over_60_population',
       'Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners) ': 'working_age_population'}



#loneliness_imd = loneliness_imd.rename(columns=big_dict)

loneliness_imd2 = loneliness_imd2.rename(columns=big_dict)

## introduce population density data


In [8]:
pop_density = pd.read_csv('england/Population_Density.csv')
pop_density = pop_density.rename(columns={'Code':'lsoa11'})

In [17]:
loneliness_imd2 = loneliness_imd2.drop(columns='lsoa_code') #this data is duplicate

Unnamed: 0,lsoa11,year,loneills,addiction_zscore,lsoa_name,district_code,district_name,imd_score,imd_rank,imd_decile,...,indoor_sub_rank,indoor_sub_decile,outdoor_sub_score,outdoor_sub_rank,outdoor_sub_decile,total_population,aged_under_15_population,aged_16-59_population,aged_over_60_population,working_age_population
0,E01000007,2015,-3.032939,-0.001290,Barking and Dagenham 015A,E09000002,Barking and Dagenham,42.513,3778.0,2.0,...,11759.0,4.0,1.279,2348.0,1.0,1478.0,372.0,1003.0,103.0,979.50
1,E01000009,2015,-6.786468,-0.101102,Barking and Dagenham 016B,E09000002,Barking and Dagenham,33.412,6750.0,3.0,...,9427.0,3.0,1.027,3794.0,2.0,1839.0,340.0,1257.0,242.0,1233.50
2,E01000010,2015,-1.346273,-0.101102,Barking and Dagenham 015C,E09000002,Barking and Dagenham,30.914,7796.0,3.0,...,5411.0,2.0,0.658,6946.0,3.0,3120.0,741.0,2241.0,138.0,2226.25
3,E01000012,2015,-2.063601,-0.101102,Barking and Dagenham 015D,E09000002,Barking and Dagenham,36.910,5475.0,2.0,...,4492.0,2.0,0.992,4034.0,2.0,2007.0,619.0,1282.0,106.0,1248.50
4,E01000017,2015,-0.972582,-0.101102,Barking and Dagenham 009C,E09000002,Barking and Dagenham,40.563,4357.0,2.0,...,11830.0,4.0,0.525,8498.0,3.0,1622.0,409.0,991.0,222.0,957.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6266,E01033758,2015,-0.932807,-0.101102,Liverpool 060B,E08000012,Liverpool,35.068,6121.0,2.0,...,2509.0,1.0,1.704,1003.0,1.0,1997.0,61.0,1868.0,68.0,1904.25
6267,E01033759,2015,-2.025628,-0.101102,Liverpool 010F,E08000012,Liverpool,54.124,1582.0,1.0,...,5987.0,2.0,0.834,5284.0,2.0,1195.0,283.0,702.0,210.0,716.25
6268,E01033762,2015,1.970892,-0.101102,Liverpool 060E,E08000012,Liverpool,26.541,9976.0,4.0,...,6832.0,3.0,1.310,2210.0,1.0,1795.0,50.0,1669.0,76.0,1678.25
6269,E01033764,2015,0.166426,0.553710,Liverpool 022E,E08000012,Liverpool,76.865,72.0,1.0,...,25196.0,8.0,1.405,1835.0,1.0,2158.0,339.0,1249.0,570.0,1303.00


In [31]:
loneliness_imd = pd.merge(left=pop_density[['lsoa11', 'People per Sq Km' ]], right=loneliness_imd2, on='lsoa11')

In [34]:
loneliness_imd = loneliness_imd.rename(columns={'People per Sq Km': 'population_density'})

In [35]:
loneliness_imd

Unnamed: 0,lsoa11,population_density,year,loneills,addiction_zscore,lsoa_name,district_code,district_name,imd_score,imd_rank,...,indoor_sub_rank,indoor_sub_decile,outdoor_sub_score,outdoor_sub_rank,outdoor_sub_decile,total_population,aged_under_15_population,aged_16-59_population,aged_over_60_population,working_age_population
0,E01020636,1886,2015,0.651901,-0.101102,County Durham 001C,E06000047,County Durham,21.632,12979.0,...,32265.0,10.0,-1.482,31684.0,10.0,1704.0,306.0,956.0,442.0,976.50
1,E01020629,3270,2015,0.299419,-0.101102,County Durham 003C,E06000047,County Durham,19.084,14979.0,...,25589.0,8.0,-1.215,30432.0,10.0,1488.0,304.0,836.0,348.0,839.25
2,E01020655,242,2015,-0.231131,-0.101102,County Durham 003D,E06000047,County Durham,11.477,22411.0,...,29921.0,10.0,-1.748,32368.0,10.0,2039.0,333.0,1115.0,591.0,1142.50
3,E01020661,2193,2015,-1.047568,-0.094337,County Durham 004A,E06000047,County Durham,27.441,9495.0,...,19737.0,7.0,-0.446,22845.0,7.0,1841.0,316.0,1046.0,479.0,1073.50
4,E01020615,2871,2015,-1.014707,-0.100593,County Durham 005B,E06000047,County Durham,39.224,4745.0,...,32597.0,10.0,-0.440,22773.0,7.0,1786.0,352.0,984.0,450.0,981.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6266,E01029335,21,2015,0.223550,-0.101102,West Somerset 004B,E07000191,West Somerset,23.310,11850.0,...,99.0,1.0,-1.643,32166.0,10.0,1080.0,143.0,551.0,386.0,569.75
6267,E01029343,210,2015,-1.088309,-0.036195,West Somerset 004E,E07000191,West Somerset,22.763,12223.0,...,9563.0,3.0,-0.790,26952.0,9.0,1574.0,260.0,719.0,595.0,740.75
6268,E01029325,30,2015,0.069151,-0.101102,West Somerset 005A,E07000191,West Somerset,20.553,13796.0,...,761.0,1.0,-1.061,29329.0,9.0,1844.0,209.0,785.0,850.0,841.75
6269,E01029328,40,2015,1.478635,-0.101102,West Somerset 005B,E07000191,West Somerset,26.006,10284.0,...,478.0,1.0,-1.705,32304.0,10.0,1939.0,253.0,877.0,809.0,927.00


## note that outliers are not dropped here


In [37]:
# check shape
       #original.               #with population density
print(loneliness_imd2.shape , loneliness_imd.shape)

(6271, 60) (6271, 61)


In [38]:
#loneliness_imd also has population density column now :)
