In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly as po
import plotly.express as px
import plotly.graph_objs as go

from counlist import countrylist


suicide = pd.read_csv("C:\\DATA_SCIENCE\\PROYECTO\\documentation\\who_suicide_statistics.csv")

suic = suicide.loc[suicide["country"].isin(countrylist)]         

suic = suic.rename(columns={'sex': 'gender'})

suic = suic[suic['year'] >= 2000]                                              


suic['age'] = suic['age'].str.rstrip('years ')                      #normalising age format


In [3]:
suic["suic_100k"] = ((suic.suicides_no/suic.population)*100000).round(2)

In [4]:
suic.describe()

Unnamed: 0,year,suicides_no,population,suic_100k
count,8172.0,8076.0,8172.0,8076.0
mean,2007.609398,307.532194,2365180.0,14.067033
std,4.66025,1008.114139,3857089.0,18.128683
min,2000.0,0.0,5879.0,0.0
25%,2004.0,10.0,294758.8,2.15
50%,2008.0,59.0,678924.0,7.955
75%,2012.0,204.25,2783655.0,18.8225
max,2016.0,21262.0,28461860.0,144.71


In [5]:
aggregation_functions = {'suic_100k': 'mean'}                                   #view of suicide on all range of ages

In [6]:
suic_ages_mean = suic.groupby(suic['age']).aggregate(aggregation_functions).sort_values("suic_100k",ascending=False).round(2)
suic_ages_mean

Unnamed: 0_level_0,suic_100k
age,Unnamed: 1_level_1
75+,26.02
55-74,18.45
35-54,17.42
25-34,12.7
15-24,9.28
5-14,0.52


In [7]:
suic_countries_mean = suic.groupby('country').aggregate(aggregation_functions).sort_values("suic_100k",ascending=True).round(2)
suic_countries_mean

Unnamed: 0_level_0,suic_100k
country,Unnamed: 1_level_1
South Africa,1.04
Turkey,2.37
Cyprus,3.48
Greece,3.77
Mexico,5.18
Brazil,6.0
Malta,6.15
Colombia,6.23
United Kingdom,6.77
Costa Rica,7.2


In [8]:
most_per_100k = suic_countries_mean.sort_values("suic_100k", ascending=False).round(2).head()
least_per_100k = suic_countries_mean.sort_values("suic_100k", ascending=False).round(2).tail()

In [9]:
print(most_per_100k)
print(least_per_100k)

                    suic_100k
country                      
Lithuania               37.57
Russian Federation      31.34
Hungary                 27.30
Latvia                  25.94
Slovenia                25.94
              suic_100k
country                
Mexico             5.18
Greece             3.77
Cyprus             3.48
Turkey             2.37
South Africa       1.04


In [10]:
most_of_all = suic.sort_values("suicides_no", ascending=False).head()
least_of_all = suic.groupby("country").sum().sort_values("suicides_no", ascending=False).tail()

In [11]:
print(most_of_all)
print(least_of_all)

                  country  year gender    age  suicides_no  population  \
33212  Russian Federation  2001   male  35-54      21262.0  21476420.0   
33200  Russian Federation  2000   male  35-54      21063.0  21378098.0   
33224  Russian Federation  2002   male  35-54      20119.0  21320535.0   
33236  Russian Federation  2003   male  35-54      18681.0  21007346.0   
33248  Russian Federation  2004   male  35-54      17465.0  20843896.0   

       suic_100k  
33212      99.00  
33200      98.53  
33224      94.36  
33236      88.93  
33248      83.79  
              year  suicides_no  population  suic_100k
country                                               
Costa Rica  361260       4291.0  59909934.0    1296.14
Luxembourg  385440        944.0   7403051.0    2622.15
Iceland     409632        636.0   4885109.0    2537.12
Cyprus      385620        411.0  12194257.0     584.69
Malta       385440        401.0   6217803.0    1181.22


In [12]:
suic_pivot = pd.pivot_table(suic, index = ['country', 'year'], values = ['suicides_no', 'suic_100k']).round(2)

In [13]:
sp_su = suic_pivot.loc["Spain"]
sp_su

Unnamed: 0_level_0,suic_100k,suicides_no
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,10.18,282.83
2001,9.37,265.75
2002,9.64,281.0
2003,9.71,289.83
2004,9.39,292.33
2005,8.97,283.25
2006,8.32,270.58
2007,8.25,272.0
2008,8.21,288.08
2009,8.17,285.75


In [14]:
lt_su = suic_pivot.loc["Lithuania"]
lt_su

Unnamed: 0_level_0,suic_100k,suicides_no
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,47.65,135.92
2001,47.42,127.75
2002,46.3,129.25
2003,44.21,121.25
2004,41.95,115.0
2005,40.48,109.92
2006,33.38,87.42
2007,33.9,85.42
2008,35.84,92.58
2009,36.42,94.83


In [15]:
ru_su = suic_pivot.loc["Russian Federation"]
ru_su

Unnamed: 0_level_0,suic_100k,suicides_no
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,42.04,4718.25
2001,42.05,4746.5
2002,40.71,4585.33
2003,39.08,4287.08
2004,37.3,4091.33
2005,35.03,3816.83
2006,33.44,3551.17
2007,32.25,3429.08
2008,30.57,3184.25
2009,29.4,3117.33


In [16]:
tu_su = suic_pivot.loc["Turkey"]
tu_su

Unnamed: 0_level_0,suic_100k,suicides_no
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2009,1.77,87.5
2010,2.64,127.0
2011,1.92,95.67
2012,2.37,120.83
2013,2.95,150.83
2014,2.57,134.75
2015,2.37,127.67


In [17]:
sa_su = suic_pivot.loc["South Africa"]
sa_su

Unnamed: 0_level_0,suic_100k,suicides_no
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,0.8,22.25
2001,1.04,32.0
2002,0.58,19.67
2003,0.77,21.33
2004,1.05,31.92
2005,1.27,38.17
2006,1.49,42.83
2007,1.01,34.75
2008,1.18,36.75
2009,0.89,31.0


In [18]:
suic_age_group = suic.groupby(['gender']).mean().round(2)
suic_age_group

Unnamed: 0_level_0,year,suicides_no,population,suic_100k
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,2007.61,137.55,2429421.94,5.68
male,2007.61,477.51,2300937.23,22.45
