# Examine alt data - may change proposal

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import statsmodels.api as sm
import statsmodels.formula.api as smf 

### Read in data

In [2]:
happiness = pd.read_csv('../data/hapiscore_whr.csv') # happiness score
fish = pd.read_csv('../data/fisfod_cons_pc.csv') # fish consumption per capita
forest_area = pd.read_csv('../data/forest_area_sq_km.csv') # sq km
planted_forest_area = pd.read_csv('../data/planted_forest_area_ha.csv') # ha
surface_area = pd.read_csv('../data/surface_area_sq_km.csv') # sq km
oil_per_cap = pd.read_csv('../data/oil_consumption_per_cap.csv') # kg per capita
sugar = pd.read_csv('../data/sugar_per_person_g_per_day.csv') # g per day
continents = pd.read_csv('../data/continents.csv') 
le = pd.read_csv('../data/le.csv', header=2) # life expectancy at birth
disorders = pd.read_csv('../data/depressive_anxiety_IHME.csv') # depressive and anxiety disorders

In [3]:
# Dict for keeping names consistent across sets
name_changes = (
    {'Hong Kong SAR, China':'Hong Kong',
     'Macao SAR, China':'Macao',
     'Korea, Rep.':'South Korea',
     'Korea, Dem. Rep.':'North Korea',
     'Czechia':'Czech Republic',
     'Slovak Republic':'Slovakia',
     'Kyrgyz Republic':'Kyrgyzstan',
     'Moldova, Republic of':'Moldova',
     'Tanzania, United Republic of':'Tanzania',
     'United Republic of Tanzania':'Tanzania',
     'UK':'United Kingdom',
     'USA':'United States',
     'United States of America':'United States',
     'Russian Federation':'Russia',
     'Congo, Dem. Rep.':'Democratic Republic of the Congo',
     'Congo, Rep.':'Congo',
     'Lao PDR':'Laos',
     "Lao People's Democratic Republic":'Laos',
     'Bahamas, The':'Bahamas',
     'Brunei Darussalam':'Brunei',
     'Congo (Democratic Republic of the)': 'Democratic Republic of the Congo',
     "Cote d'Ivoire": "Ivory Coast",
     'Gambia, The': 'Gambia',
     'The Gambia': 'Gambia',
     'Iran, Islamic Rep.': 'Iran',
     'Iran (Islamic Republic of)':'Iran',
     "Korea, Dem. People's Rep.": 'North Korea',
     "Democratic People's Republic of Korea": 'North Korea',
     'Micronesia, Fed. Sts.': 'Micronesia',
     'Turkey':'Turkiye',
     'Turks and Caicos Islands':'Turks and Caicos',
     'Syrian Arab Republic':'Syria',
     'Venezuela, RB':'Venezuela',
     'Venezuela (Bolivarian Republic of)':'Venezuela',
     'Viet Nam':'Vietnam',
     'Yemen, Rep.':'Yemen',
     'Curacao':'Curaçao',
     'Burma (Myanmar)': 'Myanmar',
     'Dem. Rep. Congo': 'Democratic Republic of the Congo',
     'Korea, North': 'North Korea',
     'Korea, South': 'South Korea',
     "Republic of Korea": 'South Korea',
     'Sint Maarten (Dutch part)': 'Sint Maarten',
     'St. Martin (French part)': 'Sint Maarten',
     'Swaziland': 'Eswatini',
     'Bahamas, The': 'Bahamas',
     'The Bahamas': 'Bahamas',
     'Egypt, Arab Rep.': 'Egypt',
     "Bolivia (Plurinational State of)": 'Bolivia',
     "Republic of Moldova": 'Moldova',
     "American Samoa": 'Samoa',
     
     }
)

## Happiness

In [4]:
# melt happiness from wide to long
happiness_long = happiness.melt(id_vars=['country'], var_name='year', value_name='happiness_score')


In [5]:

fig = px.scatter(happiness_long, x='year', y='happiness_score', color='country', template='plotly_dark')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

## Fish consumption

In [6]:
fish_long = fish.melt(id_vars=['country'], var_name='year', value_name='pcp_fish_consumption')

fig = px.scatter(fish_long, x='year', y='pcp_fish_consumption', color='country', template='plotly_dark')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

In [7]:
# see if countries are the same in each df
print(f'Count of countries in each:\n\n {fish_long["country"].nunique(), happiness_long["country"].nunique()}\n\n')

# see which countries are different - Countries in fish_long but not in happiness_long
print(f'in fish_long, not in happiness_long:\n\n {np.setdiff1d(fish_long["country"].unique(), happiness_long["country"].unique())}\n\n')

# see which countries are different - Countries in happiness_long but not in fish_long
print(f'in happiness_long, not in fish_long:\n\n {np.setdiff1d(happiness_long["country"].unique(), fish_long["country"].unique())}\n\n')



Count of countries in each:

 (170, 164)


in fish_long, not in happiness_long:

 ['Antigua and Barbuda' 'Bahamas' 'Barbados' 'Brunei' 'Cape Verde'
 'Dominica' 'Fiji' 'Grenada' 'Guinea-Bissau' 'Kiribati' 'North Korea'
 'Samoa' 'Sao Tome and Principe' 'Solomon Islands' 'St. Kitts and Nevis'
 'St. Lucia' 'St. Vincent and the Grenadines' 'Timor-Leste' 'Vanuatu']


in happiness_long, not in fish_long:

 ['Bahrain' 'Bhutan' 'Burundi' 'Comoros' 'Congo, Dem. Rep.' 'Kosovo'
 'Libya' 'Palestine' 'Qatar' 'Singapore' 'Somalia' 'South Sudan' 'Syria']




## Fish-Happiness regression model

In [33]:
# merge happiness_long and fish_long on country and year
happy_fish_merged = pd.merge(happiness_long, fish_long, on=['country', 'year'], how='inner').dropna()
happy_fish_merged['country'].nunique()


151

In [9]:
lm = smf.ols('happiness_score ~ np.log(pcp_fish_consumption)', data=happy_fish_merged).fit()
lm.summary()

0,1,2,3
Dep. Variable:,happiness_score,R-squared:,0.14
Model:,OLS,Adj. R-squared:,0.14
Method:,Least Squares,F-statistic:,262.5
Date:,"Fri, 31 Jan 2025",Prob (F-statistic):,8.559999999999999e-55
Time:,13:47:24,Log-Likelihood:,-6049.7
No. Observations:,1609,AIC:,12100.0
Df Residuals:,1607,BIC:,12110.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,45.4684,0.619,73.458,0.000,44.254,46.682
np.log(pcp_fish_consumption),3.8727,0.239,16.201,0.000,3.404,4.342

0,1,2,3
Omnibus:,145.97,Durbin-Watson:,1.678
Prob(Omnibus):,0.0,Jarque-Bera (JB):,45.959
Skew:,-0.063,Prob(JB):,1.05e-10
Kurtosis:,2.182,Cond. No.,6.96


In [10]:
lm.rsquared

0.14039609666610864

In [11]:
# plot happiness_score against pct_fish_consumption
fig = px.scatter(happy_fish_merged,
                 x='pcp_fish_consumption',
                 y='happiness_score',
                #  log_x=True,
                #  color='country',
                 hover_data = ['country', 'year', 'happiness_score', 'pcp_fish_consumption'],
                 template='plotly_dark',
                 trendline='ols',
                 trendline_options=dict(log_x=True),
                 trendline_color_override='red'
                 )
# fig.update_xaxes(range=[0, 100])
# fig.update_yaxes(range=[0, 100])

# for trace in fig.data:
#     trace.visible = 'legendonly'

# Add annotation
fig.add_annotation(
    x=80, y=63,
    text=f"R^2 = {round(lm.rsquared, 4)}",
    showarrow=True,
    arrowhead=1
)


fig.show()


## Forest area

In [12]:
forest_area_long = forest_area.melt(id_vars=['country'], var_name='year', value_name='forest_area')
forest_area_long['year'] = pd.to_numeric(forest_area_long['year'])
forest_area_long = forest_area_long[forest_area_long['year'] >= 2005].dropna()
# Convert forest_area values with k and M modifiers
def convert_value(x):
    if isinstance(x, str):
        if 'k' in x.lower():
            return float(x.lower().replace('k', '')) * 1000
        elif 'm' in x.lower():
            return float(x.lower().replace('m', '')) * 1000000
        else:
            return float(x)
    return x

forest_area_long['forest_area'] = forest_area_long['forest_area'].apply(convert_value)
forest_area_long

fig = px.scatter(forest_area_long, x='year', y='forest_area', color='country', template='plotly_dark', title='Forest Area (km^2)')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

In [13]:
planted_forest_area_long = planted_forest_area.melt(id_vars=['country'], var_name='year', value_name='planted_forest_area')
planted_forest_area_long['planted_forest_area'] = planted_forest_area_long['planted_forest_area'].apply(convert_value)*.01
planted_forest_area_long['year'] = pd.to_numeric(planted_forest_area_long['year'])
planted_forest_area_long = planted_forest_area_long[planted_forest_area_long['year'] >= 2005].dropna()

fig = px.scatter(planted_forest_area_long, x='year', y='planted_forest_area', color='country', template='plotly_dark', title='Planted Forest Area (km^2)')

# toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'
    
fig.show()

In [14]:
forest_area_long['year'] = pd.to_numeric(forest_area_long['year'])
planted_forest_area_long['year'] = pd.to_numeric(planted_forest_area_long['year'])

# see if countries are the same in each df
print(f'Count of countries in each:\n\n {forest_area_long["country"].nunique(), planted_forest_area_long["country"].nunique()}\n\n')

# see which countries are different - Countries in fish_long but not in happiness_long
print(f'in fish_long, not in forest_area_long:\n\n {np.setdiff1d(forest_area_long["country"].unique(), planted_forest_area_long["country"].unique())}\n\n')

# see which countries are different - Countries in happiness_long but not in fish_long
print(f'in planted_forest_area_long, not in forest_area_long:\n\n {np.setdiff1d(planted_forest_area_long["country"].unique(), forest_area_long["country"].unique())}\n\n')



Count of countries in each:

 (214, 220)


in fish_long, not in forest_area_long:

 ['Antigua and Barbuda' 'Bermuda' 'British Virgin Islands'
 'Channel Islands' 'Isle of Man' 'Kiribati' 'North Macedonia' 'Palau'
 'Palestine' 'St. Kitts and Nevis' 'UAE' 'Vanuatu']


in planted_forest_area_long, not in forest_area_long:

 ['Cook Is' 'Falkland Is (Malvinas)' 'French Guiana' 'Guadeloupe'
 'Holy See' 'Martinique' 'Mayotte' 'Montserrat' 'Netherlands Antilles'
 'Niue' 'Norfolk Island' 'Reunion' 'Serbia and Montenegro' 'St. Helena'
 'St.-Pierre-et-Miquelon' 'Tokelau' 'Wallis et Futuna' 'Western Sahara']




In [15]:

# merge forest_area_long and planted_forest_area_long on country and year
forest_planted_merged = pd.merge(forest_area_long, planted_forest_area_long, on=['country', 'year'], how='inner').dropna()
forest_planted_merged

# plot forest_area against planted_forest_area
fig = px.scatter(forest_planted_merged, x='planted_forest_area', y='forest_area', color='country', template='plotly_dark', title='Forest Area vs Planted Forest Area')

# toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'
    
fig.show()

## Forest area, happiness, and fish consumption

In [16]:
# convert year to numeric 
forest_planted_merged['year'] = pd.to_numeric(forest_planted_merged['year'])
happy_fish_merged['year'] = pd.to_numeric(happy_fish_merged['year'])


# merge forest_planted_merged and happy_fish_merged on country and year
forest_happy_fish_merged = pd.merge(forest_planted_merged, happy_fish_merged, on=['country', 'year'], how='inner').dropna()
forest_happy_fish_merged

# plot forest_area against happiness_score
fig = px.scatter(forest_happy_fish_merged, x='forest_area', y='happiness_score', color='country', template='plotly_dark', title='Forest Area vs Happiness Score', hover_data = ['country', 'year', 'happiness_score', 'forest_area', 'planted_forest_area'])

fig.show()

In [17]:
# planted forest area vs happiness score
fig = px.scatter(forest_happy_fish_merged,
                 x='planted_forest_area',
                 y='happiness_score',
                 color='country',
                 template='plotly_dark',
                 title='Planted Forest Area vs Happiness Score',
                 hover_data = ['country', 'year', 'happiness_score', 'forest_area', 'planted_forest_area'],
                 trendline='ols',
                #  trendline_options=dict(log_x=True),
                 trendline_color_override='red')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'

fig.show()

In [18]:
# forest area vs fish consumption
fig = px.scatter(forest_happy_fish_merged,
                 x='pcp_fish_consumption',
                 y='forest_area', 
                #  color='country',
                 hover_data = ['country', 'year', 'happiness_score', 'forest_area', 'planted_forest_area'],
                 template='plotly_dark',
                 title='Forest Area vs Fish Consumption',
                 trendline='ols',
                 trendline_color_override='red',
                 opacity=0.5,
                #  color_discrete_sequence=['#00CC96']
                 )

# # toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'

fig.show()


something's up with Gabon

In [19]:
# planted forest area vs fish consumption
fig = px.scatter(forest_happy_fish_merged,
                 x='pcp_fish_consumption',
                 y='planted_forest_area',
                #  color='country',
                 template='plotly_dark',
                 hover_data = ['country', 'year', 'happiness_score', 'forest_area', 'planted_forest_area'],
                 title='Planted Forest Area vs Fish Consumption',
                 trendline='ols',
                 trendline_color_override='red',
                 opacity=0.5,
                #  color_discrete_sequence=['#00CC96']
                 )

#toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'

fig.show()


## Oil consumption

In [20]:
# oil_per_cap_long = oil_per_cap.melt(id_vars=['country'], var_name='year', value_name='oil_per_cap')
# oil_per_cap_long['year'] = pd.to_numeric(oil_per_cap_long['year'])

# fig = px.scatter(oil_per_cap_long, x='year', y='oil_per_cap', color='country', template='plotly_dark', title='Oil Consumption per Capita')

# # toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'
    
# fig.show()

In [21]:
# investigate oil country data
# oil_per_cap_long['country'].nunique(), oil_per_cap_long['country'].nunique()


(79, 79)

## Sugar consumption

In [22]:
sugar_long = sugar.melt(id_vars=['country'], var_name='year', value_name='sugar_per_cap')
sugar_long['year'] = pd.to_numeric(sugar_long['year'])

fig = px.scatter(sugar_long, x='year', y='sugar_per_cap', color='country', template='plotly_dark', title='Sugar Consumption per Capita')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

In [23]:
# #merge sugar_long and oil_per_cap_long on country and year
# sugar_oil_merged = pd.merge(sugar_long, oil_per_cap_long, on=['country', 'year'], how='inner').dropna()
# sugar_oil_merged

# #plot sugar_per_cap against oil_per_cap
# fig = px.scatter(sugar_oil_merged,
#                  x='sugar_per_cap',
#                  y='oil_per_cap',
#                  color='country',
#                  template='plotly_dark',
#                  title='Sugar Consumption vs Oil Consumption',
#                  opacity=0.75,
#                  trendline='ols',
#                  trendline_color_override='red')

# # toggle visibility off by default
# for trace in fig.data:
#     trace.visible = 'legendonly'
    
# fig.show()

In [24]:
#merge sugar_oil_merged and forest_happy_fish_merged on country and year
happy_forest_sugar_fish_merged = pd.merge(sugar_long, forest_happy_fish_merged, on=['country', 'year'], how='inner').dropna()
happy_forest_sugar_fish_merged

#plot sugar_per_cap against happiness_score
fig = px.scatter(happy_forest_sugar_fish_merged,
                 x='sugar_per_cap',
                 y='happiness_score',
                 color='country',
                 template='plotly_dark',
                 title='Sugar Consumption vs Happiness Score',
                 opacity=0.75,
                 trendline='ols',
                 trendline_color_override='red')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

## Creating master file to export

In [25]:

life_expectancy = le.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'])
life_expectancy = pd.melt(life_expectancy,
                          id_vars=['Country Name'],
                          var_name='year',
                          value_name='life_expectancy'
                          ).dropna()
life_expectancy.rename(columns={'Country': 'country',
                                'Year':'year'})
life_expectancy = life_expectancy.rename(columns={'Country Name': 'country'})
continents = continents.rename(columns={'Country': 'country', 'Continent': 'continent'})
display(life_expectancy, continents)

# Apply name changes to both dataframes
life_expectancy['country'] = life_expectancy['country'].replace(name_changes)
continents['country'] = continents['country'].replace(name_changes)


# see if countries are the same in each df
print(f'Count of countries in each:\n\n {life_expectancy["country"].nunique(), continents["country"].nunique()}\n\n')

# see which countries are different - Countries in life_expectancy but not in happy_forest_sugar_fish_merged
print(f'in life_expectancy, not in continents:\n\n {np.setdiff1d(life_expectancy["country"].unique(), continents["country"].unique())}\n\n')

# see which countries are different - Countries in continents but not in life_expectancy
print(f'in continents, not in life_expectancy:\n\n {np.setdiff1d(continents["country"].unique(), life_expectancy["country"].unique())}\n\n')

Unnamed: 0,country,year,life_expectancy
0,Aruba,1960,64.152000
1,Africa Eastern and Southern,1960,44.085552
2,Afghanistan,1960,32.535000
3,Africa Western and Central,1960,37.845152
4,Angola,1960,38.211000
...,...,...,...
16753,Kosovo,2022,79.524000
16754,"Yemen, Rep.",2022,63.720000
16755,South Africa,2022,61.480000
16756,Zambia,2022,61.803000


Unnamed: 0,continent,country
0,Asia,Afghanistan
1,Europe,Albania
2,Africa,Algeria
3,Europe,Andorra
4,Africa,Angola
...,...,...
211,Asia,Vietnam
212,Asia,West Bank and Gaza
213,Asia,Yemen
214,Africa,Zambia


Count of countries in each:

 (259, 210)


in life_expectancy, not in continents:

 ['Africa Eastern and Southern' 'Africa Western and Central' 'Arab World'
 'British Virgin Islands' 'Caribbean small states'
 'Central Europe and the Baltics' 'Channel Islands'
 'Early-demographic dividend' 'East Asia & Pacific'
 'East Asia & Pacific (IDA & IBRD countries)'
 'East Asia & Pacific (excluding high income)' 'Euro area'
 'Europe & Central Asia' 'Europe & Central Asia (IDA & IBRD countries)'
 'Europe & Central Asia (excluding high income)' 'European Union'
 'Faroe Islands' 'Fragile and conflict affected situations'
 'French Polynesia' 'Gibraltar' 'Greenland' 'Guam'
 'Heavily indebted poor countries (HIPC)' 'High income' 'IBRD only'
 'IDA & IBRD total' 'IDA blend' 'IDA only' 'IDA total' 'Isle of Man'
 'Late-demographic dividend' 'Latin America & Caribbean'
 'Latin America & Caribbean (excluding high income)'
 'Latin America & the Caribbean (IDA & IBRD countries)'
 'Least developed countries: UN

In [26]:
# merge life_expectancy and continents on country
life_expectancy = pd.merge(life_expectancy, continents, on=['country'], how='inner').dropna()
life_expectancy['year'] = pd.to_numeric(life_expectancy['year'])

In [27]:
life_exp_happy_forest_sugar_fish_cont_merged = pd.merge(life_expectancy, happy_forest_sugar_fish_merged, on=['country', 'year'], how='inner').dropna()
life_exp_happy_forest_sugar_fish_cont_merged
# Apply name changes to both dataframes
# life_expectancy['country'] = life_expectancy['country'].replace(name_changes)
# happy_forest_sugar_fish_merged['country'] = happy_forest_sugar_fish_merged['country'].replace(name_changes)


# # see if countries are the same in each df
# print(f'Count of countries in each:\n\n {gdp_le["country"].nunique(), happy_forest_sugar_fish_merged["country"].nunique()}\n\n')

# # see which countries are different - Countries in gdp_le but not in happy_forest_sugar_fish_merged
# print(f'in gdp_le, not in happy_forest_sugar_fish_merged:\n\n {np.setdiff1d(gdp_le["country"].unique(), happy_forest_sugar_fish_merged["country"].unique())}\n\n')

# # see which countries are different - Countries in happy_forest_sugar_fish_merged but not in gdp_le
# print(f'in happy_forest_sugar_fish_merged, not in gdp_le:\n\n {np.setdiff1d(happy_forest_sugar_fish_merged["country"].unique(), gdp_le["country"].unique())}\n\n')

Unnamed: 0,country,year,life_expectancy,continent,sugar_per_cap,forest_area,planted_forest_area,happiness_score,pcp_fish_consumption
0,Australia,2005,80.841463,Oceania,128.0,1310000.0,18300.0,73.4,25.30
1,Belgium,2005,78.980488,Europe,146.0,6790.0,4070.0,72.6,24.40
2,Brazil,2005,71.753000,South America,115.0,5310000.0,54900.0,66.4,6.04
3,Canada,2005,80.112683,North America,147.0,3480000.0,117000.0,74.2,23.60
4,Czech Republic,2005,75.924390,Europe,103.0,26500.0,25800.0,64.4,10.00
...,...,...,...,...,...,...,...,...,...
1501,Vietnam,2018,73.976000,Asia,33.7,145000.0,42400.0,53.0,37.30
1502,Yemen,2018,64.575000,Asia,82.4,5490.0,0.0,30.6,3.10
1503,South Africa,2018,65.674000,Africa,113.0,171000.0,31400.0,48.8,6.35
1504,Zambia,2018,62.342000,Africa,31.4,452000.0,525.0,40.4,11.70


In [28]:
life_exp_happy_forest_sugar_fish_cont_merged= life_exp_happy_forest_sugar_fish_cont_merged.rename(
    columns={'sugar_per_cap':'sugar_g_per_person_per_day',
             'planted_forest_area':'planted_forest_area_sq_km',
             'forest_area':'forest_area_sq_km',
             'pcp_fish_consumption':'fish_kg_per_person_per_year'}
             )

In [29]:
life_exp_happy_forest_sugar_fish_cont_merged.to_csv('../data/life_exp_happy_forest_sugar_fish_cont_merged.csv', index=False)

In [30]:
disorders_long = disorders[['location', 'cause', 'year', 'val']].rename(columns={'location':'country', 'cause':'disorder', 'val':'pct_new_per_pop'}).replace(name_changes)
disorders_long

Unnamed: 0,country,disorder,year,pct_new_per_pop
0,Sri Lanka,Depressive disorders,1990,0.006642
1,Sri Lanka,Depressive disorders,1991,0.006697
2,Sri Lanka,Depressive disorders,1992,0.006811
3,Sri Lanka,Depressive disorders,1993,0.006952
4,Sri Lanka,Depressive disorders,1994,0.007061
...,...,...,...,...
13051,Bermuda,Depressive disorders,2017,0.010246
13052,Bermuda,Depressive disorders,2018,0.010313
13053,Bermuda,Depressive disorders,2019,0.010380
13054,Bermuda,Depressive disorders,2020,0.011628


In [31]:
# see if countries are the same in each df
print(f'Count of countries in each:\n\n {disorders_long["country"].nunique(), life_exp_happy_forest_sugar_fish_cont_merged["country"].nunique()}\n\n')

# see which countries are different - Countries in life_expectancy but not in happy_forest_sugar_fish_merged
print(f'in disorders_long, not in life_exp_happy_forest_sugar_fish_cont_merged:\n\n {np.setdiff1d(disorders_long["country"].unique(), life_exp_happy_forest_sugar_fish_cont_merged["country"].unique())}\n\n')

# see which countries are different - Countries in life_exp_happy_forest_sugar_fish_cont_merged but not in disorders_long
print(f'in life_exp_happy_forest_sugar_fish_cont_merged, not in disorders_long:\n\n {np.setdiff1d(life_exp_happy_forest_sugar_fish_cont_merged["country"].unique(), disorders_long["country"].unique())}\n\n')

Count of countries in each:

 (203, 139)


in disorders_long, not in life_exp_happy_forest_sugar_fish_cont_merged:

 ['Andorra' 'Antigua and Barbuda' 'Bahamas' 'Bahrain' 'Barbados' 'Bermuda'
 'Bhutan' 'Brunei' 'Burundi' 'Cabo Verde' 'Comoros' 'Congo' 'Cook Islands'
 "Côte d'Ivoire" 'Democratic Republic of the Congo' 'Dominica'
 'Equatorial Guinea' 'Eritrea' 'Fiji' 'Greenland' 'Grenada' 'Guam'
 'Guinea-Bissau' 'Kiribati' 'Kyrgyzstan' 'Laos' 'Libya' 'Marshall Islands'
 'Micronesia (Federated States of)' 'Monaco' 'Nauru' 'Niue' 'North Korea'
 'North Macedonia' 'Northern Mariana Islands' 'Palau' 'Palestine'
 'Papua New Guinea' 'Puerto Rico' 'Qatar' 'Saint Kitts and Nevis'
 'Saint Lucia' 'Saint Vincent and the Grenadines' 'Samoa' 'San Marino'
 'Sao Tome and Principe' 'Seychelles' 'Singapore' 'Slovakia'
 'Solomon Islands' 'Somalia' 'South Sudan' 'Syria'
 'Taiwan (Province of China)' 'Timor-Leste' 'Tokelau' 'Tonga' 'Tuvalu'
 'Türkiye' 'United Arab Emirates' 'United Kingdom' 'United States'
 '

Unnamed: 0,country,disorder,year,pct_new_per_pop
0,Sri Lanka,Depressive disorders,1990,0.006642
1,Sri Lanka,Depressive disorders,1991,0.006697
2,Sri Lanka,Depressive disorders,1992,0.006811
3,Sri Lanka,Depressive disorders,1993,0.006952
4,Sri Lanka,Depressive disorders,1994,0.007061
...,...,...,...,...
13051,Bermuda,Depressive disorders,2017,0.010246
13052,Bermuda,Depressive disorders,2018,0.010313
13053,Bermuda,Depressive disorders,2019,0.010380
13054,Bermuda,Depressive disorders,2020,0.011628
