#  Analysing the correlation between health condition and poverty.

In [76]:
# Importing libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

pd.set_option('display.max_rows', None)

In [77]:
# Loading dataset.
df = pd.read_csv('FRS_Nov_13.csv', low_memory=False)

df.head(20)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Disability Status of the Individual (dis),Not a disabled child / adult / person,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Disabled child / adult / person,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
0,,,,,,Employment Status of the Adult (high level bre...,Employee,Employee - RSE,Employee - Annotations,Unemployed,Unemployed - RSE,Unemployed - Annotations,Employee,Employee - RSE,Employee - Annotations,Unemployed,Unemployed - RSE,Unemployed - Annotations,
1,Financial Year,Difficulty with mental health,Government office region of the Household in t...,"Main source of Household's total, gross Income","Adult total, net Income received from all sour...",Type of Individual,,,,,,,,,,,,,
2,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,0.00,,..,0.00,,..,0.00,,..,0.00,,
3,,,,,,Working-age,3812,0.00,,18436,0.00,,..,0.00,,1696,0.00,,
4,,,,,,Pensioner (spa),..,0.00,,..,0.00,,..,0.00,,..,0.00,,
5,,,,,£50 - £99.99,Child,..,0.00,,..,0.00,,..,0.00,,..,0.00,,
6,,,,,,Working-age,12300,0.00,,8359,0.00,,4016,0.00,,3163,0.00,,
7,,,,,,Pensioner (spa),..,0.00,,..,0.00,,..,0.00,,..,0.00,,
8,,,,,£100 - £149.99,Child,..,0.00,,..,0.00,,..,0.00,,..,0.00,,
9,,,,,,Working-age,19707,0.00,,4115,0.00,,3927,0.00,,..,0.00,,


In [78]:
df.shape

(136082, 19)

In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136082 entries, 0 to 136081
Data columns (total 19 columns):
 #   Column                                     Non-Null Count   Dtype  
---  ------                                     --------------   -----  
 0   Unnamed: 0                                 22 non-null      object 
 1   Unnamed: 1                                 43 non-null      object 
 2   Unnamed: 2                                 505 non-null     object 
 3   Unnamed: 3                                 5041 non-null    object 
 4   Unnamed: 4                                 45361 non-null   object 
 5   Disability Status of the Individual (dis)  136082 non-null  object 
 6   Not a disabled child / adult / person      136081 non-null  object 
 7   Unnamed: 7                                 136081 non-null  object 
 8   Unnamed: 8                                 1 non-null       object 
 9   Unnamed: 9                                 136081 non-null  object 
 10  Unnamed:

In [80]:
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Disability Status of the Individual (dis),Not a disabled child / adult / person,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Disabled child / adult / person,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
0,,,,,,Employment Status of the Adult (high level bre...,Employee,Employee - RSE,Employee - Annotations,Unemployed,Unemployed - RSE,Unemployed - Annotations,Employee,Employee - RSE,Employee - Annotations,Unemployed,Unemployed - RSE,Unemployed - Annotations,
1,Financial Year,Difficulty with mental health,Government office region of the Household in t...,"Main source of Household's total, gross Income","Adult total, net Income received from all sour...",Type of Individual,,,,,,,,,,,,,
2,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,0.00,,..,0.00,,..,0.00,,..,0.00,,
3,,,,,,Working-age,3812,0.00,,18436,0.00,,..,0.00,,1696,0.00,,
4,,,,,,Pensioner (spa),..,0.00,,..,0.00,,..,0.00,,..,0.00,,


In [81]:
# Replacing column names.
df.columns = [
    "Financial year",
    "Difficulty with mental health",
    "Region",
    "Main source of Income",
    "Adult net income",
    "Type of individual",
    "Not disabled_employee",
    "Extra1",
    "Extra2",
    "Not disabled_unemployed",
    "Extra3",
    "Extra4",
    "Disabled_employee",
    "Extra5",
    "Extra6",
    "Disabled_unemployed",
    "Extra7",
    "Extra8",
    "Extra9"
]

# Dropping unnecessary columns.
df = df.drop(
    columns=[
        "Extra1", 
        "Extra2", 
        "Extra3", 
        "Extra4", 
        "Extra5",
        "Extra6", 
        "Extra7",
        "Extra8", 
        "Extra9"
    ], errors="ignore"
)

# Dropping the first row.
df = df.drop(index=[0, 1]).reset_index(drop=True)

In [82]:
df.head(100)

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Not disabled_employee,Not disabled_unemployed,Disabled_employee,Disabled_unemployed
0,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,..,..,..
1,,,,,,Working-age,3812,18436,..,1696
2,,,,,,Pensioner (spa),..,..,..,..
3,,,,,£50 - £99.99,Child,..,..,..,..
4,,,,,,Working-age,12300,8359,4016,3163
5,,,,,,Pensioner (spa),..,..,..,..
6,,,,,£100 - £149.99,Child,..,..,..,..
7,,,,,,Working-age,19707,4115,3927,..
8,,,,,,Pensioner (spa),..,..,..,..
9,,,,,£150 - £199.99,Child,..,..,..,..


In [83]:
# Looking at unique values in the 'Financial Year' column.
for col in df.columns.to_list():
    print(f"Unique values of {col} :", df[col].unique())

Unique values of Financial year : ['2002-03' nan '2003-04' '2004-05' '2005-06' '2006-07' '2007-08' '2008-09'
 '2009-10' '2010-11' '2011-12' '2012-13' '2013-14' '2014-15' '2015-16'
 '2016-17' '2017-18' '2018-19' '2019-20' '2020-21 (covid)' '2021-22'
 '2022-23']
Unique values of Difficulty with mental health : ['No or none' nan 'Yes']
Unique values of Region : ['North East (E12000001)' nan 'North West (E12000002)'
 'Yorkshire and The Humber (E12000003)' 'East Midlands (E12000004)'
 'West Midlands (E12000005)' 'East (E12000006)' 'London (E12000007)'
 'South East (E12000008)' 'South West (E12000009)' 'Wales (W92000004)'
 'Scotland (S92000003)' 'Northern Ireland (N92000002)']
Unique values of Main source of Income : ['Wages and salaries (dividend, sc)' nan
 'Self-employed income (dividend, sc)'
 'State Pension plus any Income Support / Pension Credit (sp, spa, ur)'
 'Disability benefits (disben, ur)' 'Tax Credits (ur)'
 'Universal Credit (ur)' 'Other benefits (ur)' 'Non-state pensions (nsp)

In [84]:
df.columns.to_list()[0:5]

['Financial year',
 'Difficulty with mental health',
 'Region',
 'Main source of Income',
 'Adult net income']

In [85]:
# Filling missing values in the 'Financial year', 
# 'Difficulty with mental health', 'Region', 'Main source of Income', 
# and 'Adult net income'.
for col in enumerate(df.columns.to_list()[0:5]):
    df[col[1]] = df[col[1]].ffill()

df.tail()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Not disabled_employee,Not disabled_unemployed,Disabled_employee,Disabled_unemployed
136075,2022-23,Yes,Northern Ireland (N92000002),Other sources,£400 - £499.99,Working-age,..,..,..,..
136076,2022-23,Yes,Northern Ireland (N92000002),Other sources,£400 - £499.99,Pensioner (spa),..,..,..,..
136077,2022-23,Yes,Northern Ireland (N92000002),Other sources,£500 or more,Child,..,..,..,..
136078,2022-23,Yes,Northern Ireland (N92000002),Other sources,£500 or more,Working-age,..,..,..,..
136079,2022-23,Yes,Northern Ireland (N92000002),Other sources,£500 or more,Pensioner (spa),..,..,..,..


In [86]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136080 entries, 0 to 136079
Data columns (total 10 columns):
 #   Column                         Non-Null Count   Dtype 
---  ------                         --------------   ----- 
 0   Financial year                 136080 non-null  object
 1   Difficulty with mental health  136080 non-null  object
 2   Region                         136080 non-null  object
 3   Main source of Income          136080 non-null  object
 4   Adult net income               136080 non-null  object
 5   Type of individual             136080 non-null  object
 6   Not disabled_employee          136080 non-null  object
 7   Not disabled_unemployed        136080 non-null  object
 8   Disabled_employee              136080 non-null  object
 9   Disabled_unemployed            136080 non-null  object
dtypes: object(10)
memory usage: 10.4+ MB


In [87]:
df.describe()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Not disabled_employee,Not disabled_unemployed,Disabled_employee,Disabled_unemployed
count,136080,136080,136080,136080,136080,136080,136080,136080,136080,136080
unique,21,2,12,10,9,3,8641,3683,4791,1848
top,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,..,..,..
freq,6480,68040,11340,13608,15120,45360,120067,130535,127165,133239


In [88]:
df.head()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Not disabled_employee,Not disabled_unemployed,Disabled_employee,Disabled_unemployed
0,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,..,..,..
1,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Working-age,3812,18436,..,1696
2,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Pensioner (spa),..,..,..,..
3,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Child,..,..,..,..
4,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Working-age,12300,8359,4016,3163


In [None]:
# Melting the columns.
melted_df = pd.melt(
    df, 
    id_vars=[
        'Financial year', 
        'Difficulty with mental health', 
        'Region', 
        'Main source of Income', 
        'Adult net income', 
        'Type of individual'
    ], value_vars=[
        'Not disabled_employee', 
        'Not disabled_unemployed', 
        'Disabled_employee', 
        'Disabled_unemployed'
    ],var_name='Disability_employment_status', 
    value_name='Population count'
)

melted_df.head()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Disability_employment_status,Population count
0,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,Not disabled_employee,..
1,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Working-age,Not disabled_employee,3812
2,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Pensioner (spa),Not disabled_employee,..
3,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Child,Not disabled_employee,..
4,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Working-age,Not disabled_employee,12300


In [90]:
melted_df['Disability_employment_status'].unique()

array(['Not disabled_employee', 'Not disabled_unemployed',
       'Disabled_employee', 'Disabled_unemployed'], dtype=object)

In [None]:
# Creating new column called 'Employment status' from mapping 
# 'Disability_employment_status' column.
melted_df['Employment status'] = melted_df[
    'Disability_employment_status'
].map({
    'Not disabled_employee' : 'Employee', 
    'Not disabled_unemployed' : 'Unemployed',
    'Disabled_employee' : 'Employee', 
    'Disabled_unemployed' : 'Unemployed'
})

In [None]:
# Creating new column called 'Disability status' from mapping 
# 'Disability_employment_status' column.
melted_df['Disability status'] = melted_df[
    'Disability_employment_status'
].map({
    'Not disabled_employee' : 'Not disabled', 
    'Not disabled_unemployed' : 'Not disabled',
    'Disabled_employee' : 'Disabled', 
    'Disabled_unemployed' : 'Disabled'
})

In [95]:
# Dropping 'Disability_employment_status' column.
melted_df.drop(columns='Disability_employment_status', axis=1, inplace=True)

In [96]:
melted_df.head()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Population count,Employment status,Disability status
0,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,Employee,Not disabled
1,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Working-age,3812,Employee,Not disabled
2,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Pensioner (spa),..,Employee,Not disabled
3,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Child,..,Employee,Not disabled
4,2002-03,No or none,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Working-age,12300,Employee,Not disabled


In [97]:
melted_df['Financial year'].unique()

array(['2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08',
       '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14',
       '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20',
       '2020-21 (covid)', '2021-22', '2022-23'], dtype=object)

In [None]:
# Cleaning 'Financial Year' column.
# Extracting the starting year as an integer.
melted_df['Financial year'] = melted_df['Financial year'].str.split('-').\
    str[0].astype(int)

In [99]:
melted_df['Financial year'].unique()

array([2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022])

In [100]:
melted_df['Difficulty with mental health'].unique()

array(['No or none', 'Yes'], dtype=object)

In [101]:
# Cleaning 'Difficulty with mental health' column.
melted_df['Difficulty with mental health'] = melted_df[
    'Difficulty with mental health'
].map({
    'No or none' : 'No', 
    'Yes' : 'Yes'
})

In [107]:
melted_df.head()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Population count,Employment status,Disability status
0,2002,No,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Child,..,Employee,Not disabled
1,2002,No,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Working-age,3812,Employee,Not disabled
2,2002,No,North East (E12000001),"Wages and salaries (dividend, sc)",Less than £50,Pensioner (spa),..,Employee,Not disabled
3,2002,No,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Child,..,Employee,Not disabled
4,2002,No,North East (E12000001),"Wages and salaries (dividend, sc)",£50 - £99.99,Working-age,12300,Employee,Not disabled


In [104]:
# Looking at unique values in the 'Region' column.
df['Region'].unique()

array(['North East (E12000001)', 'North West (E12000002)',
       'Yorkshire and The Humber (E12000003)',
       'East Midlands (E12000004)', 'West Midlands (E12000005)',
       'East (E12000006)', 'London (E12000007)', 'South East (E12000008)',
       'South West (E12000009)', 'Wales (W92000004)',
       'Scotland (S92000003)', 'Northern Ireland (N92000002)'],
      dtype=object)

In [109]:
# Cleaning 'Region' column data.
melted_df['Region'] = melted_df['Region'].map({
    'North East (E12000001)' : 'North East',
    'North West (E12000002)' : 'North West',
    'Yorkshire and The Humber (E12000003)' : 'Yorkshire and The Humber',
    'East Midlands (E12000004)' : 'East Midlands',
    'West Midlands (E12000005)' : 'West Midlands',
    'East (E12000006)' : 'East',
    'London (E12000007)' : 'London',
    'South East (E12000008)' : 'South East', 
    'South West (E12000009)' : 'South West',
    'Wales (W92000004)' : 'Wales',
    'Scotland (S92000003)' : 'Scotland',
    'Northern Ireland (N92000002)' : 'Northern Ireland'
})

In [106]:
# Looking at unique values in 'Main source of Income' column.
melted_df['Main source of Income'].unique()

array(['Wages and salaries (dividend, sc)',
       'Self-employed income (dividend, sc)',
       'State Pension plus any Income Support / Pension Credit (sp, spa, ur)',
       'Disability benefits (disben, ur)', 'Tax Credits (ur)',
       'Universal Credit (ur)', 'Other benefits (ur)',
       'Non-state pensions (nsp)', 'Investments', 'Other sources'],
      dtype=object)

In [111]:
# Cleaning 'Main source of Income' column data.
melted_df['Main source of Income'] = melted_df['Main source of Income'].map({
    'Wages and salaries (dividend, sc)' : 'Wages and salaries',
    'Self-employed income (dividend, sc)' : 'Self-employed income',
    'State Pension plus any Income Support / Pension Credit (sp, spa, ur)' : 'State Pension',
    'Disability benefits (disben, ur)' : 'Disability benefits', 
    'Tax Credits (ur)' : 'Tax Credits',
    'Universal Credit (ur)' : 'Universal Credit', 
    'Other benefits (ur)' : 'Other benefits',
    'Non-state pensions (nsp)' : 'Non-state pension', 
    'Investments' : 'Investments', 
    'Other sources' : 'Other sources'
})

In [112]:
melted_df.head()

Unnamed: 0,Financial year,Difficulty with mental health,Region,Main source of Income,Adult net income,Type of individual,Population count,Employment status,Disability status
0,2002,No,North East,Wages and salaries,Less than £50,Child,..,Employee,Not disabled
1,2002,No,North East,Wages and salaries,Less than £50,Working-age,3812,Employee,Not disabled
2,2002,No,North East,Wages and salaries,Less than £50,Pensioner (spa),..,Employee,Not disabled
3,2002,No,North East,Wages and salaries,£50 - £99.99,Child,..,Employee,Not disabled
4,2002,No,North East,Wages and salaries,£50 - £99.99,Working-age,12300,Employee,Not disabled
