In [None]:
#Import Libarary that needed. 
import numpy as np #library to precess arrays.
import pandas as pd #library to data analyst
import matplotlib.pyplot as plt #library to visualization
import seaborn as sns #Ploting data
import warnings #library to ignore every warning that would be appeared.
warnings.filterwarnings('ignore') #I choose ignore warning than the other kind of warnings
import geopandas #Ploting map data

In [None]:
#Open data in csv format and print the 5 head of dataset.
data = pd.read_csv('../input/suicide-rates-overview-1985-to-2016/master.csv')
print(data.head())

In [None]:
data.info() #Print basic information from the dataset, that is range index, data columns including type of columns, and memory usage of the dataset.

In [None]:
data.shape #print the number of columns and rows in a dataset.

In [None]:
data.isnull().sum() #Print total of every non-values in a dataset. Non values are in HDI for year column but latter, I don't use HDI so I ignore the nan values.

# **Plot Countries Map**

In [None]:
data.country.unique() #Show countries in this dataset.

In [None]:
# generate country code based on country name 
import pycountry 
def alpha3code(column):
    CODE=[]
    for country in column:
        try:
            code=pycountry.countries.get(name=country).alpha_3
           # .alpha_3 means 3-letter country code 
           # .alpha_2 means 2-letter country code
            CODE.append(code)
        except:
            CODE.append('None')
    return CODE
# create a column for code 
data['CODE']=alpha3code(data.country)
data.head()

In [None]:
# getting latitude and longitude for mapping data.
# 'naturalearth_lowres' is geopandas datasets so we can use it directly
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
# rename the columns so that we can merge with our data
world.columns=['pop_est', 'continent', 'name', 'CODE', 'gdp_md_est', 'geometry']
# then merge with our data 
merge=pd.merge(world,data,on='CODE')
# last thing we need to do is - merge again with our location data which contains each country’s latitude and longitude
location=pd.read_csv('https://raw.githubusercontent.com/melanieshi0120/COVID-19_global_time_series_panel_data/master/data/countries_latitude_longitude.csv')
merge=merge.merge(location,on='name').sort_values(by='country',ascending=False).reset_index()

In [None]:
# plot population world distribution map. More lightening plot, more population. 
merge.plot(column='population', scheme="std_mean",
           figsize=(25, 20))
plt.title('Population Countries ',fontsize=25)
plt.show()

# List of Top 15 Suicides Countries 

In [None]:
#Making dataframe that show countries's gdp that show rich or poor country and count suicide by country.
data_country = data.groupby('country', as_index=False).agg(
             suicides_per_a_hundred_thousand_pop=('suicides/100k pop', 'sum'),
             suicides_no=('suicides_no', 'sum'),
             gdp_for_year_dollar=(' gdp_for_year ($) ', 'max')).reset_index() \
            .sort_values(by = ['suicides_per_a_hundred_thousand_pop', 'suicides_no'], ascending=False) \
            .reset_index(drop=True) \
            .drop('index', axis = 1)
data_country.index = np.arange(1,len(data_country)+1)

In [None]:
data_country.head(20)

# **Plot Suicide on Year by Year**

In [None]:
#Counting number of suicide on year by year.
data["year"] = pd.to_datetime(data["year"].astype(str), format="%Y")
data_year = data.groupby('year').suicides_no.count() \
                                .reset_index(name='count') \
                               .sort_values(['count'], ascending=False) \
                               .reset_index(drop=True)# Making dataframe to plot it. 

In [None]:
plt.figure(figsize =(10, 10)) #Making frame shape of the plot
ax = data_year.plot(x="year",y="count")
plt.title('Suicide based on Year', size = 15) # Create title.
plt.xlabel('Year') # Create label of X axis.
plt.ylabel('Count') #Create label of Y axis.
plt.xticks( rotation=25) #Rotate x sticks to 25 degree.
plt.show() #Show the plot.

# **Plot Based on Categorical Data**

In [None]:
def categorical_summarized(dataframe, x=None, y=None, hue=None, palette='Set1', verbose=True):
    '''
    Helper function that gives a quick summary of a given column of categorical data
    Arguments
    =========
    dataframe: pandas dataframe
    x: str. horizontal axis to plot the labels of categorical data, y would be the count
    y: str. vertical axis to plot the labels of categorical data, x would be the count
    hue: str. if you want to compare it another variable (usually the target variable)
    palette: array-like. Colour of the plot
    Returns
    =======
    Quick Stats of the data and also the count plot
    '''
    if x == None:
        column_interested = y
    else:
        column_interested = x
    series = dataframe[column_interested]
    print(series.describe())
    print('mode: ', series.mode())
    if verbose:
        print('='*80)
        print(series.value_counts())
    plt.xticks(rotation=25)
    sns.countplot(x=x, y=y, hue=hue, data=dataframe, palette=palette)
    plt.legend(bbox_to_anchor=(1.05, 1))
    plt.show()

In [None]:
#Plot and see statistical information of sex and its relation with generation. 
categorical_summarized(data, x="sex", hue='generation')

In [None]:
#Plot and see statistical information of age and its relation with generation.
categorical_summarized(data, x="age", hue='generation')

# **Plot based on Quantitative Columns**

In [None]:
def quantitative_summarized(dataframe, x=None, y=None, hue=None, palette='Set1', ax=None, verbose=True, swarm=False):
    '''
    Helper function that gives a quick summary of quantattive data
    Arguments
    =========
    dataframe: pandas dataframe
    x: str. horizontal axis to plot the labels of categorical data (usually the target variable)
    y: str. vertical axis to plot the quantitative data
    hue: str. if you want to compare it another categorical variable (usually the target variable if x is another variable)
    palette: array-like. Colour of the plot
    swarm: if swarm is set to True, a swarm plot would be overlayed
    Returns
    =======
    Quick Stats of the data and also the box plot of the distribution
    '''
    series = dataframe[y]
    print(series.describe())
    print('mode: ', series.mode())
    if verbose:
        print('='*80)
        print(series.value_counts())

    sns.boxplot(x=x, y=y, hue=hue, data=dataframe, palette=palette, ax=ax)

    if swarm:
        sns.swarmplot(x=x, y=y, hue=hue, data=dataframe,
                      palette=palette, ax=ax)

    plt.show()

In [None]:
# multivariate analysis with age and gdp_per_capita ($) variable
quantitative_summarized(dataframe= data, y = 'age', x = 'gdp_per_capita ($)', verbose=False, swarm=False)