# Import libraries

In [1]:
# library/package for data manipulation

import pandas as pd

# Import dataset

In [2]:
# Read-in dataset 

df = pd.read_csv('/Users/loucap/Documents/GitWork/Census_data/Data/religion_cen.csv')

In [3]:
# Take a look at column data types

df.dtypes

Lower tier local authorities Code    object
Lower tier local authorities         object
Religion (10 categories) Code         int64
Religion (10 categories)             object
Observation                           int64
dtype: object

In [4]:
# View snapshot of data using df.head()

df.head(10)

Unnamed: 0,Lower tier local authorities Code,Lower tier local authorities,Religion (10 categories) Code,Religion (10 categories),Observation
0,E06000001,Hartlepool,-8,Does not apply,0
1,E06000001,Hartlepool,1,No religion,36995
2,E06000001,Hartlepool,2,Christian,48495
3,E06000001,Hartlepool,3,Buddhist,180
4,E06000001,Hartlepool,4,Hindu,222
5,E06000001,Hartlepool,5,Jewish,27
6,E06000001,Hartlepool,6,Muslim,1213
7,E06000001,Hartlepool,7,Sikh,166
8,E06000001,Hartlepool,8,Other religion,285
9,E06000001,Hartlepool,9,Not answered,4755


# Clean dataset

In [5]:
# Clean up column names

df = df.rename(columns = {"Lower tier local authorities Code": "LA_code", "Lower tier local authorities" : "LA_name", "Religion (10 categories) Code" : "Religion_code", "Religion (10 categories)": "Religion_categories"})

In [6]:
# Let's see if it worked..

df.head()

Unnamed: 0,LA_code,LA_name,Religion_code,Religion_categories,Observation
0,E06000001,Hartlepool,-8,Does not apply,0
1,E06000001,Hartlepool,1,No religion,36995
2,E06000001,Hartlepool,2,Christian,48495
3,E06000001,Hartlepool,3,Buddhist,180
4,E06000001,Hartlepool,4,Hindu,222


In [7]:
# Subset dataframe to remove redundant category

df = df[df.Religion_categories != 'Does not apply']

In [8]:
df.head(10)

Unnamed: 0,LA_code,LA_name,Religion_code,Religion_categories,Observation
1,E06000001,Hartlepool,1,No religion,36995
2,E06000001,Hartlepool,2,Christian,48495
3,E06000001,Hartlepool,3,Buddhist,180
4,E06000001,Hartlepool,4,Hindu,222
5,E06000001,Hartlepool,5,Jewish,27
6,E06000001,Hartlepool,6,Muslim,1213
7,E06000001,Hartlepool,7,Sikh,166
8,E06000001,Hartlepool,8,Other religion,285
9,E06000001,Hartlepool,9,Not answered,4755
11,E06000002,Middlesbrough,1,No religion,52415


# Pre-processing data

In [9]:
# Calculate percentages

df['Percentage'] = df.groupby('LA_name')['Observation'].transform(lambda x: round((x / x.sum()) * 100, 2))

In [10]:
df

Unnamed: 0,LA_code,LA_name,Religion_code,Religion_categories,Observation,Percentage
1,E06000001,Hartlepool,1,No religion,36995,40.06
2,E06000001,Hartlepool,2,Christian,48495,52.52
3,E06000001,Hartlepool,3,Buddhist,180,0.19
4,E06000001,Hartlepool,4,Hindu,222,0.24
5,E06000001,Hartlepool,5,Jewish,27,0.03
...,...,...,...,...,...,...
3305,W06000024,Merthyr Tydfil,5,Jewish,5,0.01
3306,W06000024,Merthyr Tydfil,6,Muslim,289,0.49
3307,W06000024,Merthyr Tydfil,7,Sikh,82,0.14
3308,W06000024,Merthyr Tydfil,8,Other religion,229,0.39


# Data Analysis

In [11]:
# Filter dataframe to just include Christian respondents

Christianity_Frame = df[df.Religion_categories == 'Christian'].reset_index(drop = True)

In [12]:
#  Top 5 local authorities for Christians

top5_christian = Christianity_Frame.sort_values(by = 'Percentage', ascending = False).head(5).reset_index(drop = True)

In [13]:
top5_christian

Unnamed: 0,LA_code,LA_name,Religion_code,Religion_categories,Observation,Percentage
0,E08000011,Knowsley,2,Christian,102929,66.61
1,E07000124,Ribble Valley,2,Christian,40855,66.37
2,E07000029,Copeland,2,Christian,43645,65.07
3,E08000014,Sefton,2,Christian,179806,64.39
4,E08000010,Wigan,2,Christian,206870,62.82


In [17]:
# Filter dataframe to just include Muslim respondents

Muslim_Frame = df[df.Religion_categories == 'Muslim'].reset_index(drop = True)

In [16]:
Muslim_Frame

Unnamed: 0,LA_code,LA_name,Religion_code,Religion_categories,Observation,Percentage
6,E06000001,Hartlepool,6,Muslim,1213,1.31
16,E06000002,Middlesbrough,6,Muslim,14703,10.22
26,E06000003,Redcar and Cleveland,6,Muslim,984,0.72
36,E06000004,Stockton-on-Tees,6,Muslim,6675,3.40
46,E06000005,Darlington,6,Muslim,1849,1.72
...,...,...,...,...,...,...
3266,W06000020,Torfaen,6,Muslim,363,0.39
3276,W06000021,Monmouthshire,6,Muslim,435,0.47
3286,W06000022,Newport,6,Muslim,11280,7.07
3296,W06000023,Powys,6,Muslim,462,0.35
