In [3]:
import pandas as pd

In [4]:
# Loading the Anxiety Dataset 
anxiety_df=pd.read_csv(r"C:\Users\missi\Downloads\Anxiety2021\Anxiety2021.csv")
anxiety = anxiety_df[["location","sex","age","val"]] # Subselecting the preferred features 
anxiety = anxiety.rename(columns={"val": "anxiety_percent"}) # Renaming the value column
anxiety.head()

Unnamed: 0,location,sex,age,anxiety_percent
0,Taiwan (Province of China),Male,15-49 years,0.032339
1,Taiwan (Province of China),Female,15-49 years,0.052631
2,People's Republic of China,Male,15-49 years,0.030089
3,People's Republic of China,Female,15-49 years,0.04999
4,Democratic People's Republic of Korea,Male,15-49 years,0.031223


In [7]:
# loading the Bipolar Data 
bipolar_df=pd.read_csv(r"C:\Users\missi\Downloads\Bipolar2021\Bipolar2021.csv")
bipolar = bipolar_df[["location","sex","val"]] # Subselecting the preferred features 
bipolar = bipolar.rename(columns={"val": "bipolar_percent"}) # Renaming the value column
bipolar.head()

Unnamed: 0,location,sex,bipolar_percent
0,Democratic People's Republic of Korea,Male,0.002446
1,Democratic People's Republic of Korea,Female,0.002546
2,People's Republic of China,Male,0.002371
3,People's Republic of China,Female,0.002573
4,Democratic Republic of Timor-Leste,Male,0.003815


In [9]:
# loading the Despression Data
depress_df=pd.read_csv(r"C:\Users\missi\Downloads\Depression2021\Depression2021.csv")
depress = depress_df[["location","sex","val"]] 
depress = depress.rename(columns={"val": "depress_percent"})
depress.head()

Unnamed: 0,location,sex,depress_percent
0,People's Republic of China,Male,0.025406
1,People's Republic of China,Female,0.041064
2,Democratic People's Republic of Korea,Male,0.023614
3,Democratic People's Republic of Korea,Female,0.040503
4,Republic of Kiribati,Male,0.037915


In [15]:
# Loading the Suicide Rates per 100k in each Country for each gender : Male and Female 
suicide_df=pd.read_csv(r"C:\Users\missi\Downloads\Suicide2021\Suicide2021.csv")
suicide = suicide_df[["location","sex","val"]] 
suicide = suicide.rename(columns={"val": "suicide_rate_per100k"})
suicide.head()

Unnamed: 0,location,sex,suicide_rate_per100k
0,Republic of Kiribati,Male,51.437264
1,Republic of Kiribati,Female,7.89114
2,People's Republic of China,Male,7.148372
3,People's Republic of China,Female,4.274235
4,Independent State of Samoa,Male,29.459208


In [30]:
# Loading the GDP dataset:
gdp_df=pd.read_csv(r"C:\Users\missi\Downloads\GDP\GDP.csv")

# Dropping unnecessary columns like - Indicator Name and Indicator Code : 
gdp_long = gdp_df.drop(columns=["Indicator Name", "Indicator Code"])

# Renaming some columns to match with the other datasets:
gdp_long = gdp_long.rename(columns={"Country Name": "location", "Country Code": "country_code"})

# Converting the individual year columns to a single Year column 
gdp = gdp_long.melt(id_vars=["location", "country_code"],var_name="year",value_name="gdp_usd")

gdp["year"] = pd.to_numeric(gdp["year"], errors="coerce")

# Filtering only the year 2021
gdp_2021 = gdp[gdp["year"] == 2021].copy()

# Using the list of locations from Anxiety dataset to compare and join with GDP dataset:
valid_locations = set(anxiety["location"].unique())
gdp_2021 = gdp_2021[gdp_2021["location"].isin(valid_locations)]

gdp_2021=gdp_2021[["location","country_code","year","gdp_usd"]]
gdp_2021.head()

Unnamed: 0,location,country_code,year,gdp_usd
16228,Islamic Republic of Afghanistan,AFG,2021.0,356.496214
16230,Republic of Angola,AGO,2021.0,1925.874661
16231,Republic of Albania,ALB,2021.0,6413.283286
16232,Principality of Andorra,AND,2021.0,42425.699676
16234,United Arab Emirates,ARE,2021.0,43360.021101


In [32]:
# Merging the 5 datasets together:

# Merge anxiety and bipolar
merged = pd.merge(anxiety, bipolar, on=["location", "sex"], how="inner")

# Adding depression to anxiety and bipolar
merged = pd.merge(merged, depress, on=["location", "sex"], how="inner")

# Adding suicide rate to depression , anxiety and bipolar
merged = pd.merge(merged, suicide, on=["location", "sex"], how="inner")

# Adding GDP to suicide rate , depression , anxiety and bipolar
merged = pd.merge(merged, gdp_2021, on=["location"], how="inner")

merged.head(5)





Unnamed: 0,location,sex,age,anxiety_percent,bipolar_percent,depress_percent,suicide_rate_per100k,country_code,year,gdp_usd
0,People's Republic of China,Male,15-49 years,0.030089,0.002371,0.025406,7.148372,CHN,2021.0,12617.505104
1,People's Republic of China,Female,15-49 years,0.04999,0.002573,0.041064,4.274235,CHN,2021.0,12617.505104
2,Democratic People's Republic of Korea,Male,15-49 years,0.031223,0.002446,0.023614,10.637906,PRK,2021.0,
3,Democratic People's Republic of Korea,Female,15-49 years,0.052123,0.002546,0.040503,7.770534,PRK,2021.0,
4,Socialist Republic of Viet Nam,Male,15-49 years,0.02269,0.004234,0.026537,11.649648,VNM,2021.0,3704.193559


In [34]:
merged.to_csv("intermediate_suicide_2021.csv", index=False)