# Data for Interactive Dash Maps

In [1]:
import sys
sys.path.insert(1, '/Users/lauradellantonio/neuefische/Capstone/capstone')

In [2]:
import pandas as pd

import functions.functions_data
import functions.functions_mask_wearing
import functions.functions_corona_stats
import functions.functions_HDI
import functions.functions_mask_req

import plotly.express as px  # (version 4.7.0)
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import dash  # (version 1.12.0) pip install dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

from flask import Flask
import os

pd.set_option('display.max_columns', None) # To display all columns

import warnings
warnings.filterwarnings(action='ignore')

### Reading in data

In [3]:
# Reading in survey data and joining it into one dataframe
dfs_country = functions.functions_data.get_data("/Users/lauradellantonio/neuefische/Capstone/capstone/data/CMU_Global_data/Full_Survey_Data/country/smooth/", "country")
countries = pd.concat(dfs_country, ignore_index=True)

Read in data completed.


In [4]:
# Adding the real COVID numbers to the dataframe
covid_cases = pd.read_csv("/Users/lauradellantonio/neuefische/Capstone/capstone/data/Corona_stats/owid-covid-data.csv")
countries = functions.functions_corona_stats.merge_corona_stats(countries,covid_cases)

Merging corona stats completed.


In [5]:
# For having the time animation in the plot the date must be a string and not a date-time object
# Sorting out the date column within the dataframe
#countries = functions.functions_data.insert_month(countries)

In [6]:
# Dealing with the NaN in the mask wearing column
countries = functions.functions_mask_wearing.deal_with_NaNs_masks(countries)

NaNs before update: 163383
NaNs after update: 0
Updated.


In [7]:
# Getting the dictionaries for the hdi and the hdi-levels.
dict_hdi, dict_hdi_levels = functions.functions_HDI.get_hdi("/Users/lauradellantonio/neuefische/Capstone/capstone/data","hdro_statistical_data_tables_1_15_d1_d5.xlsx")

# Creating a new column with the hdi and the hdi-levels in the regions dataframe
countries = functions.functions_HDI.create_hdi_columns(countries, dict_hdi, dict_hdi_levels)

Creating dictionaries for hdi and hdi-levels completed.
Creating hdi list completed.
Creating hdi-level list completed.


In [8]:
# Adding the mask wearing requirements to the dataframe
# mask_wearing_requirements = pd.read_csv("/Users/lauradellantonio/neuefische/Capstone/capstone/data/data-nbhtq.csv")
# df = functions.functions_mask_req.merge_mask_requirements(countries,mask_wearing_requirements)

In [14]:
df = countries.copy()

In [15]:
df.loc[df["new_cases_smoothed_per_million"] < 0, "new_cases_smoothed_per_million"] = 0
df.loc[df["new_deaths_smoothed_per_million"] < 0, "new_deaths_smoothed_per_million"] = 0

### Adjusting data for app.py

In [24]:
dff = df.copy()
dff = dff[dff["age_bucket"]=="overall"]

In [25]:
dff2 = dff.groupby(["country_agg", "iso_code","gender", "date"])[["total_cases_per_million","total_deaths_per_million", 
                    "median_age", "hdi", "rolling_total_responses", "smoothed_pct_cli_weighted", "smoothed_pct_worked_outside_home_weighted", 
                    "smoothed_pct_grocery_outside_home_weighted", "smoothed_pct_ate_outside_home_weighted", 
                    "smoothed_pct_spent_time_with_non_hh_weighted", "smoothed_pct_attended_public_event_weighted", 
                    "smoothed_pct_used_public_transit_weighted", "smoothed_pct_direct_contact_with_non_hh_weighted", 
                    "smoothed_pct_no_public_weighted"]].mean()

In [26]:
#["rolling_total_responses",
#            "smoothed_pct_cli_weighted", "smoothed_pct_worked_outside_home_weighted", 
#              "smoothed_pct_grocery_outside_home_weighted", "smoothed_pct_ate_outside_home_weighted", 
#              "smoothed_pct_spent_time_with_non_hh_weighted", "smoothed_pct_attended_public_event_weighted", 
#              "smoothed_pct_used_public_transit_weighted", "smoothed_pct_direct_contact_with_non_hh_weighted", 
#              "smoothed_pct_no_public_weighted", "smoothed_pct_wear_mask_all_time_weighted", 
#            "smoothed_pct_wear_mask_most_time_weighted", "smoothed_pct_wear_mask_half_time_weighted", 
#            "smoothed_pct_wear_mask_some_time_weighted", "smoothed_pct_wear_mask_none_time_weighted" ]

In [27]:
dff2 = dff2.stack().reset_index()

In [28]:
dff2 = dff2.rename(columns={"level_4": "data_cat", 0: "amount"})

In [29]:
dff2.to_csv("/Users/lauradellantonio/neuefische/Capstone/capstone/dash_data/countries_dash.csv.gzip", compression="gzip")

# Map testing

In [None]:
df_map = df.copy()

In [None]:
df_map = df_map[df_map["age_bucket"]=="overall"]
df_map = df_map[df_map["gender"]=="overall"]

In [None]:
df_map = df_map.groupby(["country_agg", "iso_code", "date"])[["total_deaths_per_million"]].mean()

In [None]:
df_map = df_map.stack().reset_index()

In [None]:
df_map = df_map.rename(columns={"level_3": "data_cat", 0: "amount"})

In [None]:
df_map

In [None]:
fig = px.choropleth(
        data_frame = df_map,
        locations="iso_code",
        color="amount",
        hover_name="country_agg",
        animation_frame = "date",
        projection = "natural earth",
        color_continuous_scale=["white", "#00c5ff", "#00287f", "#00151f"])
fig.show("browser")

### Adjusting data for app.py - more complicated

In [None]:
df1 = countries[countries["date"]=="2020-07-16"]

In [None]:
df1 = countries.groupby(["country_agg", "GID_0","gender", "age_bucket", "date"])[["rolling_total_responses",
            "smoothed_pct_cli_weighted", "smoothed_pct_worked_outside_home_weighted", 
              "smoothed_pct_grocery_outside_home_weighted", "smoothed_pct_ate_outside_home_weighted", 
              "smoothed_pct_spent_time_with_non_hh_weighted", "smoothed_pct_attended_public_event_weighted", 
              "smoothed_pct_used_public_transit_weighted", "smoothed_pct_direct_contact_with_non_hh_weighted", 
              "smoothed_pct_no_public_weighted", "smoothed_pct_wear_mask_all_time_weighted", 
            "smoothed_pct_wear_mask_most_time_weighted", "smoothed_pct_wear_mask_half_time_weighted", 
            "smoothed_pct_wear_mask_some_time_weighted", "smoothed_pct_wear_mask_none_time_weighted" ]].mean()

In [None]:
df2 = df1.stack().reset_index()

In [None]:
df2 = df2.rename(columns={"level_5": "data_cat", 0: "amount"})

In [None]:
df2.to_csv("dash_data/countries_dash2.csv.gzip", compression="gzip")