Author: Niamh Hogan

# Suicide Mortality in Ireland: Demographic Trends and EU Comparison (2012â€“2022)

In [1]:
# Imports

import pandas as pd
import matplotlib.pyplot as plt


## <b>Data Cleansing</b>

in this section --

<b>Step 1: Read in datasets</b>

In [2]:
# Read in irish data: irishdata_year_age_sex_cso.csv
irish_age_sex_df = pd.read_csv('./data/irishdata_year_age_sex_cso.csv')

# sanity check
irish_age_sex_df.head(3)

Unnamed: 0,Statistic Label,Year,Sex,Cause of Death,Age Group at Death,UNIT,VALUE
0,Revised Deaths Occurring,2007,Both sexes,X60-X84 Intentional self-harm,Under 1 year,Number,
1,Revised Deaths Occurring,2007,Both sexes,X60-X84 Intentional self-harm,1 - 4 years,Number,
2,Revised Deaths Occurring,2007,Both sexes,X60-X84 Intentional self-harm,5 - 9 years,Number,


In [3]:
# Read in irish data: irishdata_year_counties_sex_cso.csv
irish_counties_df = pd.read_csv('./data/irishdata_year_counties_sex_cso.csv')

# sanity check
irish_counties_df.head(3)

Unnamed: 0,Statistic Label,Year,Sex,County,Cause of Death,UNIT,VALUE
0,Deaths Occuring,2015,Both sexes,Ireland,Intentional self-harm (X60-X84),Number,500.0
1,Deaths Occuring,2015,Both sexes,Carlow County Council,Intentional self-harm (X60-X84),Number,7.0
2,Deaths Occuring,2015,Both sexes,Dublin City Council,Intentional self-harm (X60-X84),Number,54.0


In [4]:
# Read in EU Deaths: who_eu_deaths.csv
eu_deaths_df = pd.read_csv('./data/who_eu_deaths.csv', skiprows=30, low_memory=False)

# sanity check
eu_deaths_df.head(3)

Unnamed: 0,COUNTRY,COUNTRY_GRP,AGE_GRP_LIST,SEX,SUBNATIONAL_MDB,YEAR,VALUE
0,ALB,,TOTAL,FEMALE,,1987.0,25.0
1,ALB,,TOTAL,FEMALE,,1988.0,22.0
2,ALB,,TOTAL,FEMALE,,1989.0,15.0


In [5]:
# Read in EU population: eu_pop_2012_2022
eu_pop_df = pd.read_csv('./data/eu_pop_2012_2022.csv')

# sanity check
eu_pop_df.head(3)

Unnamed: 0,Time,geo,Value,age,sex,unit
0,2012,AT,8408121,TOTAL,T,NR
1,2012,BE,11075889,TOTAL,T,NR
2,2012,BG,7327224,TOTAL,T,NR


<b>Step 2: Drop Unnecessary Columns</b>

In [6]:
# drop unnecessary columns for irish_age_sex_df
drop_col_list1 = ["Statistic Label", "Cause of Death", "UNIT"]

irish_age_sex_df.drop(columns=drop_col_list1, inplace=True)

# sanity check
print(irish_age_sex_df.head(3))

   Year         Sex Age Group at Death  VALUE
0  2007  Both sexes       Under 1 year    NaN
1  2007  Both sexes        1 - 4 years    NaN
2  2007  Both sexes        5 - 9 years    NaN


In [7]:
# drop unnecessary columns for irish_counties_df 
drop_col_list2= ["Statistic Label", "Cause of Death", "UNIT"]

irish_counties_df.drop(columns=drop_col_list2, inplace=True)

# sanity check
print(irish_counties_df.head(3))

   Year         Sex                 County  VALUE
0  2015  Both sexes                Ireland  500.0
1  2015  Both sexes  Carlow County Council    7.0
2  2015  Both sexes    Dublin City Council   54.0


In [8]:
# drop unnecessary columns for eu_deaths_df
drop_col_list3= ["COUNTRY_GRP", "AGE_GRP_LIST", "SUBNATIONAL_MDB"]

eu_deaths_df.drop(columns=drop_col_list3, inplace=True)

# sanity check
print(eu_deaths_df.head(3))

  COUNTRY     SEX    YEAR  VALUE
0     ALB  FEMALE  1987.0   25.0
1     ALB  FEMALE  1988.0   22.0
2     ALB  FEMALE  1989.0   15.0


In [9]:
# drop unnecessary columns for eu_pop_df
drop_col_list4= ["age", "sex", "unit"]

eu_pop_df.drop(columns=drop_col_list4, inplace=True)

# sanity check
print(eu_pop_df.head(3)) 

   Time geo     Value
0  2012  AT   8408121
1  2012  BE  11075889
2  2012  BG   7327224


<b>Step 3: Drop Unnecessary Rows</b>

<b>Step 4: Check data types</b>

<b>Step 5: Set Indexes</b>

<b>Step 6: Sort Columns</b>