In [1]:
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient

In [2]:
lat_lon_file = pd.read_csv("data/world_lat_lon.csv")

geo_df = pd.DataFrame(lat_lon_file)
geo_df.head()

Unnamed: 0,country_code,latitude,longitude,country,usa_state_code,usa_state_latitude,usa_state_longitude,usa_state
0,AD,42.546245,1.601554,Andorra,AK,63.588753,-154.493062,Alaska
1,AE,23.424076,53.847818,United Arab Emirates,AL,32.318231,-86.902298,Alabama
2,AF,33.93911,67.709953,Afghanistan,AR,35.20105,-91.831833,Arkansas
3,AG,17.060816,-61.796428,Antigua and Barbuda,AZ,34.048928,-111.093731,Arizona
4,AI,18.220554,-63.068615,Anguilla,CA,36.778261,-119.417932,California


In [3]:
geo_df = geo_df.rename(columns={"country":"Country", "latitude":"Latitude", "longitude":"Longitude"})
clean_geo_df = geo_df.drop(columns=["country_code","usa_state_code","usa_state_latitude","usa_state_longitude","usa_state"])
clean_geo_df = clean_geo_df[["Country","Latitude","Longitude"]]
clean_geo_df.head()

Unnamed: 0,Country,Latitude,Longitude
0,Andorra,42.546245,1.601554
1,United Arab Emirates,23.424076,53.847818
2,Afghanistan,33.93911,67.709953
3,Antigua and Barbuda,17.060816,-61.796428
4,Anguilla,18.220554,-63.068615


In [7]:
happy_file = pd.read_csv("data/happy_2020.csv")
happy_df = pd.DataFrame(happy_file)
happy_df = happy_df.rename(columns={"Country name":"Country","Social support":"Social_support","Healthy life expectancy":"Healthy_life_expectancy","Freedom to make life choices":"Freedom_to_make_life_choices","Perceptions of corruption":"Perceptions_of_corruption"})
happy_df = happy_df.drop(columns={"Regional indicator","Ladder score","Logged GDP per capita","Standard error of ladder score","upperwhisker","lowerwhisker","Ladder score in Dystopia","Explained by: Log GDP per capita","Explained by: Social support","Explained by: Healthy life expectancy","Explained by: Freedom to make life choices","Explained by: Generosity","Explained by: Perceptions of corruption","Dystopia + residual"})
happy_country_df = happy_df.loc[happy_df["Country"].isin(["China","France","India","Israel","Pakistan","Russia","United Kingdom","United States"])]
happy_country_df

Unnamed: 0,Country,Social_support,Healthy_life_expectancy,Freedom_to_make_life_choices,Generosity,Perceptions_of_corruption
12,United Kingdom,0.936683,72.301605,0.834744,0.263732,0.435916
13,Israel,0.913571,73.200256,0.747581,0.102594,0.780855
17,United States,0.914219,68.2995,0.84262,0.149892,0.699715
22,France,0.937104,73.801933,0.825468,-0.130642,0.583521
65,Pakistan,0.689062,58.253136,0.734834,0.0449,0.745705
72,Russia,0.903151,64.100456,0.729893,-0.151154,0.864803
93,China,0.798761,69.289192,0.898518,-0.181426,0.753971
143,India,0.592201,60.215187,0.881445,0.057552,0.772043


In [8]:
happy_geo_merge_df = pd.merge(clean_geo_df,happy_country_df, on="Country", suffixes=("_geo","_happy"))
happy_geo_merge_df["Healthy_life_expectancy"] = happy_geo_merge_df["Healthy_life_expectancy"].astype(float).map("{:,.2f}".format)
happy_geo_merge_df['Freedom_to_make_life_choices'] = pd.Series(["{0:.2f}%".format(val * 100) for val in happy_geo_merge_df['Freedom_to_make_life_choices']], index = happy_geo_merge_df.index)
happy_geo_merge_df['Perceptions_of_corruption'] = pd.Series(["{0:.2f}%".format(val * 100) for val in happy_geo_merge_df['Perceptions_of_corruption']], index = happy_geo_merge_df.index)
happy_geo_merge_df['Generosity'] = pd.Series(["{0:.2f}%".format(val * 100) for val in happy_geo_merge_df['Generosity']], index = happy_geo_merge_df.index)
happy_geo_merge_df['Social_support'] = pd.Series(["{0:.2f}%".format(val * 100) for val in happy_geo_merge_df['Social_support']], index = happy_geo_merge_df.index)
happy_geo_merge_df.head()

Unnamed: 0,Country,Latitude,Longitude,Social_support,Healthy_life_expectancy,Freedom_to_make_life_choices,Generosity,Perceptions_of_corruption
0,China,35.86166,104.195397,79.88%,69.29,89.85%,-18.14%,75.40%
1,France,46.227638,2.213749,93.71%,73.8,82.55%,-13.06%,58.35%
2,United Kingdom,55.378051,-3.435973,93.67%,72.3,83.47%,26.37%,43.59%
3,Israel,31.046051,34.851612,91.36%,73.2,74.76%,10.26%,78.09%
4,India,20.593684,78.96288,59.22%,60.22,88.14%,5.76%,77.20%


In [9]:
happy_geo_merge_df.to_csv("data/happy_geo", index=True)

# Database Setup

In [10]:
#Define connection link an instantiate client
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#Define the database in Mongo
db = client.happyDB

#Declare the business collection
happyGeo = db.happyGeo

In [11]:
#Instantiate for loop for populating business collection
for index, row in happy_geo_merge_df.iterrows():
    #Populate post dictionary with information to be inserted into business collection
    post = {
        "Country": str(row["Country"]),
        "Latitude": float(row["Latitude"]),
        "Longitude": float(row["Latitude"]),
        "Healthy_life_expectancy": str(row["Healthy_life_expectancy"]),
        "Freedom_to_make_life_choices": str(row["Freedom_to_make_life_choices"]),
        "Generosity": str(row["Generosity"]),
        "Perceptions_of_corruption": str(row["Perceptions_of_corruption"])

    }
    #Insert data into business collection, post{} by post{}
    happyGeo.insert_one(post)

# Data to JSON For Visualizations

In [12]:
happy_geo_merge_df.to_json("data/happy_json.json")