# Examine alt data - may change proposal

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px 

### Read in data

In [2]:
happiness = pd.read_csv('../data/hapiscore_whr.csv')
fish = pd.read_csv('../data/fisfod_cons_pc.csv')
forest_area = pd.read_csv('../data/forest_area_sq_km.csv')
planted_forest_area = pd.read_csv('../data/planted_forest_area_ha.csv')
surface_area = pd.read_csv('../data/surface_area_sq_km.csv')
oil_per_cap = pd.read_csv('../data/oil_consumption_per_cap.csv')


## Happiness

In [3]:
# melt happiness from wide to long
happiness_long = happiness.melt(id_vars=['country'], var_name='year', value_name='happiness_score')


In [4]:

fig = px.scatter(happiness_long, x='year', y='happiness_score', color='country', template='plotly_dark')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

## Fish consumption

In [5]:
fish_long = fish.melt(id_vars=['country'], var_name='year', value_name='pct_fish_consumption')

fig = px.scatter(fish_long, x='year', y='pct_fish_consumption', color='country', template='plotly_dark')

# toggle visibility off by default
for trace in fig.data:
    trace.visible = 'legendonly'
    
fig.show()

In [6]:
# see if countries are the same in each df
print(f'Count of countries in each:\n\n {fish_long['country'].nunique(), happiness_long['country'].nunique()}\n\n')

# see which countries are different - Countries in fish_long but not in happiness_long
print(f'in fish_long, not in happiness_long:\n\n {np.setdiff1d(fish_long['country'].unique(), happiness_long['country'].unique())}\n\n')

# see which countries are different - Countries in happiness_long but not in fish_long
print(f'in happiness_long, not in fish_long:\n\n {np.setdiff1d(happiness_long['country'].unique(), fish_long['country'].unique())}\n\n')



Count of countries in each:

 (170, 164)


in fish_long, not in happiness_long:

 ['Antigua and Barbuda' 'Bahamas' 'Barbados' 'Brunei' 'Cape Verde'
 'Dominica' 'Fiji' 'Grenada' 'Guinea-Bissau' 'Kiribati' 'North Korea'
 'Samoa' 'Sao Tome and Principe' 'Solomon Islands' 'St. Kitts and Nevis'
 'St. Lucia' 'St. Vincent and the Grenadines' 'Timor-Leste' 'Vanuatu']


in happiness_long, not in fish_long:

 ['Bahrain' 'Bhutan' 'Burundi' 'Comoros' 'Congo, Dem. Rep.' 'Kosovo'
 'Libya' 'Palestine' 'Qatar' 'Singapore' 'Somalia' 'South Sudan' 'Syria']




In [7]:
# merge happiness_long and fish_long on country and year
happy_fish_merged = pd.merge(happiness_long, fish_long, on=['country', 'year'], how='inner')
happy_fish_merged
# plot happiness_score against pct_fish_consumption
fig = px.scatter(happy_fish_merged,
                 x='pct_fish_consumption',
                 y='happiness_score',
                 color='country',
                 hover_data = ['country', 'year', 'happiness_score', 'pct_fish_consumption'],
                 template='plotly_dark'
                 )
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])

fig.show()