In [18]:
import pandas as pd 
import os 
import sys 
import dash 
import plotly.express as px

## Preprocessing data 

In [218]:
def import_data(dir_name):
    col_to_rename = {"Country/Region":"Country_Region",
                     "Province/State":"Province_State",
                     "Last Update":"Last_Update",
                     "Long_":"Longitude",
                     "Lat":"Latitude"
                    }
    df_cols = ['Province_State', 'Country_Region', 'Last_Update', 'Confirmed','Deaths', 'Recovered','Active']
    df = pd.DataFrame(columns = df_cols)
    for file_name in os.listdir(dir_name):
        if file_name.endswith(".csv"):
            tmp = pd.read_csv(os.path.join(dir_name, file_name)).rename(columns=col_to_rename)
            tmp["Date"] = file_name[-14:-4]
            df = pd.concat(
                [df, tmp], 
                axis=0, 
                join='outer'
                          )
            
    return df

def preprocess_data(df):
    
    df.Deaths = df.Deaths.fillna(0)
    df.Confirmed = df.Confirmed.fillna(0)
    df.Recovered = df.Recovered.fillna(0)
    df.Active = df.Active.fillna(0)
    df.Province_State = df.Province_State.fillna("")  

    df.Last_Update = pd.to_datetime(pd.to_datetime(df.Last_Update).map(lambda x: x.strftime('%Y-%m-%d'))) 
    df = df.drop_duplicates(subset=["Admin2","Province_State","Country_Region","Date"], keep="first")
    
    return df


def clean_country_names(df):
    country_names_mistakes = {
                            "Mainland China":"China",
                            "Viet Nam":"Vietnam", 
                            "Taiwan*":"Taiwan", 
                            "Hong Kong SAR":"Hong Kong,", 
                            "Gambia, The":"Gambia",
                            "Guinea-Bissau":"Guinea", 
                            "Czechia":"Czech Republic",
                            "Bahamas The":"Bahamas",
                            "Korea, South":"South Korea"
                             }
    df.Country_Region = df.Country_Region.replace(country_names_mistakes)
        
    return df
        
    
def get_per_country_data(df):
    return df.groupby(["Country_Region","Date"])["Confirmed","Deaths","Recovered"].sum().reset_index()

def get_specific_countries(df_per_country, list_of_countries):
    return df_per_country[df_per_country.Country_Region.isin(list_of_countries)]


In [219]:
df = import_data("..\csse_covid_19_data\csse_covid_19_daily_reports")
df = clean_country_names(preprocess_data(df))

df = get_per_country_data(df)

ParserError: Error tokenizing data. C error: out of memory

## World spread of Covid 19 

In [47]:
df_per_country = df_per_country.sort_values(by="Date")
#df_per_country.Last_Update = df_per_country.Last_Update.map(lambda x: x.strftime('%Y-%m-%d'))

fig = px.choropleth(df_per_country, 
                    locations="Country_Region", 
                    locationmode = "country names",
                    color="Confirmed", 
                    color_continuous_scale="Peach",
                    hover_name="Country_Region", 
                    animation_frame="Date"
                   )

fig.update_layout(
    title_text = 'Spread of Coronavirus',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))
    
fig.show()

## US Covid 19 

In [210]:
import json 


def get_states_data(df_usa):
    
    df_usa = df[df.Country_Region == "US"]
    
    city_to_code = pd.read_csv("https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv")
    city_to_code = city_to_code.loc[:,["city","state_abbr"]].set_index("city").to_dict()["state_abbr"]
    with open("../additional_data/us_code.json") as json_file:
        code_to_province = json.load(json_file)
        province_to_code = dict([[v,k] for k,v in code_to_province.items()])

    df_usa["Code"] = [el.split(",")[-1].strip() for el in df_usa.Province_State]
    df_usa["Code"] = df_usa["Code"].replace(city_to_code)
    df_usa["Code"] = df_usa["Code"].replace(province_to_code)

    error = []
    for province in df_usa.Code:
        if len(province) > 2:
            error.append(province)
    error = list(set(error))

    df_usa = df_usa[~ df_usa.Code.isin(error)]

    code_province = pd.read_csv("https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv")
    code_province = code_province.loc[:,["city","state_abbr"]].set_index("city").to_dict()["state_abbr"]
    df_usa = df_usa.groupby(["Code","Date"])["Confirmed"].sum().reset_index().sort_values(by="Date")
    
    return df_usa

df_usa = get_states_data(df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [212]:
fig = px.choropleth(df_usa, 
                    locations=df_usa.Code, 
                    locationmode = "USA-states",
                    color="Confirmed", 
                    color_continuous_scale="Peach",
                    animation_frame="Date"
                   )

fig.update_layout(
    title_text = 'Spread of Coronavirus',
    title_x = 0.5,
    geo_scope='usa',
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))
    
fig.show()

In [7]:
# pas un truc alarmiste
# micro virus en lui même 
# impact du confinement sur le covid 
# les choses évoluent 
# vue avec d'autres pays UE - MONDE
# homme femmes ..