## Inequalities in Health in England

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests
import os
import re

### Data Gathering

In [2]:
if False:
    url_local_health="https://wpieconomics.us13.list-manage.com/track/click?u=5331abc1c0dacc833dd4e807b&id=645de758bb&e=9f29c59835.csv"
    multiple_deprivation='https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx'
    urls=[url_local_health,multiple_deprivation]
    for url in urls:
        response=requests.get(url)
        with open(url.split(".")[2]+"."+url.split(".")[-1],'wb') as file:
            file.write(response.content)

In [None]:
life_exp_df=pd.read_csv('list-manage.csv')
imd_df=pd.read_excel("service.xlsx",sheet_name="IMD2019")

### Pre-loading dataset 1

In [None]:
life_exp_df.info()

#### Pre-loadong  dataset 2

In [None]:
imd_df.info()

## Data Acessment

#### Assessing dataset 1

In [None]:
life_exp_df.head()

In [None]:
life_exp_df.columns

##### Outcome of Assessing the Dataset 1
- tidy up data table- life expectancy in a single column
- Remove the title
- city code to another column
- rename columns
- Treat NaN values
- Strip the city code of the city label
- remove NaN for life expectancy at birth- that's the independent variable

### Assessing dataset 2

In [None]:
imd_df.info()

In [None]:
imd_df.head()

### Assessing dataset2
- drop LOSA code (2011) & LSOA name (2011) columns
- rename columns
- Convert IMD rank to integer
- convert IMD Decile to integer

## Data Cleaning

In [None]:
expectancy_df=life_exp_df.copy()
deprivation_df=imd_df.copy()

### Dataset 1
 Delete the first two rows and rename columns

In [None]:
header=["area_id","city","male_life_expectancy","female_life_expectancy"]

In [None]:
expectancy_df=expectancy_df.iloc[2:]
expectancy_df.columns=header
expectancy_df

In [None]:
splited_df=expectancy_df.city.str.split(r'(\d+)', expand=True,regex=True,)
splited_df.columns=["city","area_code","empty_column"]
print(splited_df.empty_column.value_counts())


In [None]:
expectancy_df.drop('city',axis=1,inplace=True)

In [None]:
expectancy_df['city']=splited_df['city']
expectancy_df['area_code']=splited_df['area_code']
expectancy_df.reset_index(drop=True,inplace=True)
expectancy_df=expectancy_df[['area_id','area_code','city',"male_life_expectancy","female_life_expectancy"]]
expectancy_df.head()

In [None]:
#convert life expectancy to type float
expectancy_df.male_life_expectancy=expectancy_df.male_life_expectancy.astype(float)
expectancy_df.female_life_expectancy=expectancy_df.female_life_expectancy.astype(float)

In [None]:
expectancy_df.info()

### Treat NaN values

In [None]:
expectancy_df.isnull().sum()

In [None]:
expectancy_df.head()

In [None]:
expectancy_df.city=expectancy_df.city.str.strip()
expectancy_df[expectancy_df.male_life_expectancy.isnull()==True ]

In [None]:
#replace empty cells with corresponding mean of specific city
for i,cty in enumerate(expectancy_df['city']): 
    if pd.isnull(expectancy_df.loc[i,'male_life_expectancy']):
        expectancy_df.loc[i,'male_life_expectancy']=expectancy_df.query("city==@cty")["male_life_expectancy"].mean()

In [None]:
#replace empty cells with corresponding mean of specific city
for i,cty in enumerate(expectancy_df['city']): 
    if pd.isnull(expectancy_df.loc[i,'female_life_expectancy']):
        expectancy_df.loc[i,'female_life_expectancy']=expectancy_df.query("city==@cty")["female_life_expectancy"].mean()

In [None]:
expectancy_df.isnull().sum()

In [None]:
expectancy_df.info()

In [None]:
expectancy_df[expectancy_df.male_life_expectancy.isnull()==True ]

In [None]:
expectancy_df.groupby("city", as_index=False)[expectancy_df.columns[-2:].to_list()].mean()

In [None]:
male_expectancy=expectancy_df.copy()
female_expectancy=expectancy_df.copy()

In [None]:
male_expectancy.insert(3,column="gender",value="Male")
male_expectancy.rename({"male_life_expectancy":"life_expectancy"},axis=1,inplace=True)
male_expectancy.drop("female_life_expectancy",axis=1,inplace=True)

In [None]:
female_expectancy.insert(3,column="gender",value="Female")
female_expectancy.rename({"female_life_expectancy":"life_expectancy"},axis=1,inplace=True)
female_expectancy.drop("male_life_expectancy",axis=1,inplace=True)

In [None]:
female_expectancy.head()

In [None]:
male_expectancy.head()

In [None]:
df.shape

In [None]:
gender_exp_df=pd.concat([male_expectancy,female_expectancy])
gender_exp_df.reset_index(drop=True,inplace=True)

In [None]:
gender_exp_df.head()

In [None]:
male=gender_exp_df[gender_exp_df['gender']=="Male"]['life_expectancy']
female=gender_exp_df[gender_exp_df['gender']=="Female"]['life_expectancy']

In [None]:
sns.relplot(data=gender_exp_df,x=male,y=female);

In [None]:
gender_exp_df

In [None]:
df = gender_exp_df.copy()

In [None]:
m_mean = df.query("gender == 'Male'")['life_expectancy'].mean()
f_mean = df.query("gender == 'Female'")['life_expectancy'].mean()

plt.hist(data=df.query("gender == 'Male'"), x='life_expectancy', bins=30, label='Male', alpha=0.5)
plt.hist(data=df.query("gender == 'Female'"), x='life_expectancy', bins=30, label='Female', alpha=0.5)
plt.axvline(x=m_mean, label = 'male_avg', color='blue')
plt.axvline(x=f_mean, label = 'female_avg', color='#AF0B1E')
plt.legend();

In [None]:
sns.kdeplot(data=df, x='life_expectancy', hue='gender');

In [None]:
grouped = df.groupby(['city', 'gender'])[['life_expectancy']].mean().reset_index()
grouped.head()

In [None]:
compare = expectancy_df.iloc[:, 2:]
compare = compare.groupby('city').mean()
compare['female_higher'] = compare.female_life_expectancy > compare.male_life_expectancy
print('Females have higher life expectancy than males in {} of the {} cities in the dataset'.format(compare.female_higher.sum(),compare.shape[0]))

In [None]:
compare[compare.female_higher == False]

In [None]:
england_life_expectancy=round(np.mean([f_mean,m_mean]),2)
england_life_expectancy

In [None]:
city_exp=gender_exp_df.groupby("city",as_index=False)["life_expectancy"].mean()
city_exp["above_average"]=city_exp.life_expectancy>england_life_expectancy


In [None]:
print('{} cities have life expectancy above the average life expectancy in England'.format(city_exp.query("above_average==True")['above_average'].count()))

In [None]:
print('{} cities have life expectancy below the average life expectancy in England'.format(city_exp.query("above_average==False")['above_average'].count()))

In [None]:
city_exp.above_average.value_counts()

In [None]:
city_exp.query("above_average==False")

In [None]:
import folium

In [None]:
pip intall folium