## Libraries

In [6]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import plotly.offline as plo 
import plotly.graph_objects as go

## Data Gathering

In [4]:
df = pd.read_csv('master.csv')

In [5]:
df.head(10)

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers
5,Albania,1987,female,75+ years,1,35600,2.81,Albania1987,,2156624900,796,G.I. Generation
6,Albania,1987,female,35-54 years,6,278800,2.15,Albania1987,,2156624900,796,Silent
7,Albania,1987,female,25-34 years,4,257200,1.56,Albania1987,,2156624900,796,Boomers
8,Albania,1987,male,55-74 years,1,137500,0.73,Albania1987,,2156624900,796,G.I. Generation
9,Albania,1987,female,5-14 years,0,311000,0.0,Albania1987,,2156624900,796,Generation X


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27820 entries, 0 to 27819
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   country             27820 non-null  object 
 1   year                27820 non-null  int64  
 2   sex                 27820 non-null  object 
 3   age                 27820 non-null  object 
 4   suicides_no         27820 non-null  int64  
 5   population          27820 non-null  int64  
 6   suicides/100k pop   27820 non-null  float64
 7   country-year        27820 non-null  object 
 8   HDI for year        8364 non-null   float64
 9    gdp_for_year ($)   27820 non-null  object 
 10  gdp_per_capita ($)  27820 non-null  int64  
 11  generation          27820 non-null  object 
dtypes: float64(2), int64(4), object(6)
memory usage: 2.5+ MB


In [8]:
df.isna().sum()

country                   0
year                      0
sex                       0
age                       0
suicides_no               0
population                0
suicides/100k pop         0
country-year              0
HDI for year          19456
 gdp_for_year ($)         0
gdp_per_capita ($)        0
generation                0
dtype: int64

In [11]:
df['country'].unique()

array(['Albania', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Barbados', 'Belarus', 'Belgium', 'Belize',
       'Bosnia and Herzegovina', 'Brazil', 'Bulgaria', 'Cabo Verde',
       'Canada', 'Chile', 'Colombia', 'Costa Rica', 'Croatia', 'Cuba',
       'Cyprus', 'Czech Republic', 'Denmark', 'Dominica', 'Ecuador',
       'El Salvador', 'Estonia', 'Fiji', 'Finland', 'France', 'Georgia',
       'Germany', 'Greece', 'Grenada', 'Guatemala', 'Guyana', 'Hungary',
       'Iceland', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan',
       'Kazakhstan', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lithuania', 'Luxembourg', 'Macau', 'Maldives', 'Malta',
       'Mauritius', 'Mexico', 'Mongolia', 'Montenegro', 'Netherlands',
       'New Zealand', 'Nicaragua', 'Norway', 'Oman', 'Panama', 'Paraguay',
       'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar',
       'Republic of Korea', 'Romania', '

In [16]:
df.groupby(by='country').agg({
    "suicides_no": "sum",
    "population": "sum"
})

Unnamed: 0_level_0,suicides_no,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Albania,1970,62325467
Antigua and Barbuda,11,1990228
Argentina,82219,1035985431
Armenia,1905,77348173
Aruba,101,1259677
...,...,...
United Arab Emirates,622,36502275
United Kingdom,136805,1738767780
United States,1034013,8054027201
Uruguay,13138,84068943


In [18]:
df.groupby(by='age').agg({
    "suicides_no": "sum",
    "population": "sum"
})

Unnamed: 0_level_0,suicides_no,population
age,Unnamed: 1_level_1,Unnamed: 2_level_1
15-24 years,808542,8642946896
25-34 years,1123912,8438103587
35-54 years,2452141,14375888123
5-14 years,52264,8398693237
55-74 years,1658443,8803245340
75+ years,653118,2663281253


## Data Visualization

In [34]:
data_country = df.groupby(by='country').agg({
    "suicides_no": "sum",
    "population": "sum"
})
data = go.Bar(
    x=data_country.index,
    y=data_country['suicides_no']/data_country['population'],
    name="Suicide per population",
    marker = dict(color='red')
)
layout = go.Layout(
    title = 'Suicide per Population COUNTRY wise',
    template='plotly_dark'
)
fig = go.Figure(data=data, layout=layout)
plo.iplot(fig)

In [37]:
data_age = df.groupby(by='age').agg({
    "suicides_no": "sum",
    "population": "sum"
})
data = go.Bar(
    x=data_age.index,
    y=data_age['suicides_no']/data_age['population'],
    name="Suicide per population",
    marker = dict(color='brown')
)
layout = go.Layout(
    title = 'Suicide per Population AGE wise',
    template='plotly_dark'
)
fig = go.Figure(data=data, layout=layout)
plo.iplot(fig)