In [1]:
# Global Suicide Rate Analysis

# Importing dependencies
import pandas as pd


In [2]:
# Read master suicide rates data into dataframe
master_data_df = pd.read_csv("./source_data/master.csv")

print(len(master_data_df))

master_data_df.head()

27820


Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers


In [3]:
# Cleaningup raw data
master_data_df.columns

master_data_df = master_data_df.drop_duplicates()

master_data_df.rename(columns={' gdp_for_year ($) ': 'gdp_for_year ($)'}, inplace=True)

print(len(master_data_df))
print(master_data_df.count())


27820
country               27820
year                  27820
sex                   27820
age                   27820
suicides_no           27820
population            27820
suicides/100k pop     27820
country-year          27820
HDI for year           8364
gdp_for_year ($)      27820
gdp_per_capita ($)    27820
generation            27820
dtype: int64


In [4]:
# Filtering 10 years data (2006-2015) 
ten_yr_df = master_data_df.loc[(master_data_df["year"]>2005) & (master_data_df["year"]<2016)]

ten_yr_df.to_csv("./source_data/ten_yr_data.csv")

print(len(ten_yr_df))
print(ten_yr_df.count())

9840
country               9840
year                  9840
sex                   9840
age                   9840
suicides_no           9840
population            9840
suicides/100k pop     9840
country-year          9840
HDI for year          4740
gdp_for_year ($)      9840
gdp_per_capita ($)    9840
generation            9840
dtype: int64


In [5]:
# Determining Top 10 countries with higher suicide numbers (10 year)
country_grp = ten_yr_df.groupby(["country"])

country_grp_df = pd.DataFrame({"total_suicides_no": country_grp["suicides_no"].sum()})

country_grp_df = country_grp_df.reset_index()

country_grp_df = country_grp_df.sort_values(["total_suicides_no"], ascending=False)

country_grp_df.to_csv("./source_data/top_country_grp_data.csv")

print(len(country_grp_df))

country_grp_df.head(10)

95


Unnamed: 0,country,total_suicides_no
92,United States,387385
71,Russian Federation,334082
45,Japan,278985
69,Republic of Korea,138480
34,Germany,98697
15,Brazil,97856
32,France,90413
89,Ukraine,81876
65,Poland,59658
55,Mexico,52100


In [7]:
# Yearly population by country - combining population of both male and female, all ages (all countries, 10 years)
country_yr_grp = ten_yr_df.groupby(["country","year"])

country_yr_grp_df = pd.DataFrame({"country_yearly_population": country_yr_grp["population"].sum()
                                 ,"country_yearly_suicides": country_yr_grp["suicides_no"].sum()
                                 })

country_yr_grp_df.head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,country_yearly_population,country_yearly_suicides
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Albania,2006,2780176,0
Albania,2007,2770344,124
Albania,2008,2757059,160
Albania,2009,2745735,0
Albania,2010,2736025,96
Antigua and Barbuda,2006,81973,0
Antigua and Barbuda,2007,83131,1
Antigua and Barbuda,2008,84273,0
Antigua and Barbuda,2009,85405,0
Antigua and Barbuda,2012,88703,0


In [8]:
# Filter 10 year data for top 10 countries
top_country_ten_yr_df = pd.merge(ten_yr_df,country_grp_df.iloc[0:10], how='inner', on='country')

# Merging coutry_yearly_population for top 10 countries
top_country_ten_yr_df = pd.merge(top_country_ten_yr_df, country_yr_grp_df, how='inner', on=['country','year']) 

top_country_ten_yr_df.to_csv("./source_data/top_country_ten_yr_data.csv")

print(len(top_country_ten_yr_df), top_country_ten_yr_df['country'].nunique())
print(top_country_ten_yr_df.count())

top_country_ten_yr_df.head(10)

1176 10
country                      1176
year                         1176
sex                          1176
age                          1176
suicides_no                  1176
population                   1176
suicides/100k pop            1176
country-year                 1176
HDI for year                  468
gdp_for_year ($)             1176
gdp_per_capita ($)           1176
generation                   1176
total_suicides_no            1176
country_yearly_population    1176
country_yearly_suicides      1176
dtype: int64


Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation,total_suicides_no,country_yearly_population,country_yearly_suicides
0,Brazil,2006,male,75+ years,275,1575798,17.45,Brazil2006,,1107640297890,6403,Silent,97856,172974722,8618
1,Brazil,2006,male,55-74 years,1112,9247477,12.02,Brazil2006,,1107640297890,6403,Silent,97856,172974722,8618
2,Brazil,2006,male,35-54 years,2536,22709135,11.17,Brazil2006,,1107640297890,6403,Boomers,97856,172974722,8618
3,Brazil,2006,male,25-34 years,1537,15717099,9.78,Brazil2006,,1107640297890,6403,Generation X,97856,172974722,8618
4,Brazil,2006,male,15-24 years,1293,18063460,7.16,Brazil2006,,1107640297890,6403,Millenials,97856,172974722,8618
5,Brazil,2006,female,35-54 years,708,24105759,2.94,Brazil2006,,1107640297890,6403,Boomers,97856,172974722,8618
6,Brazil,2006,female,55-74 years,265,10786642,2.46,Brazil2006,,1107640297890,6403,Silent,97856,172974722,8618
7,Brazil,2006,female,15-24 years,387,17631557,2.19,Brazil2006,,1107640297890,6403,Millenials,97856,172974722,8618
8,Brazil,2006,female,25-34 years,333,15862415,2.1,Brazil2006,,1107640297890,6403,Generation X,97856,172974722,8618
9,Brazil,2006,female,75+ years,47,2466737,1.91,Brazil2006,,1107640297890,6403,Silent,97856,172974722,8618
