In [1]:
import pandas as pd
import streamlit as st
import altair as alt

from altair import datum
from typing import Tuple
from vega_datasets import data
from math import ceil, floor, log10

In [2]:
# Altair test
source = data.stocks()
alt.Chart(source).mark_line().encode(
    x='date',
    y='price',
    color='symbol',
).interactive()

NumExpr defaulting to 8 threads.


In [3]:
URL_OPENCOVID19 = "https://raw.githubusercontent.com/victorvicpal/COVID19_es/master/data/final_data/dataCOVID19_es.csv"
url_pop_ccaa = "https://raw.githubusercontent.com/victorvicpal/COVID19_es/master/data/info_data/Poblaci%C3%B3nCCAA.csv"

In [4]:
def get_data(url) -> Tuple[pd.DataFrame]:
    """
    1 - Get data from opencovid19 repository
    2 - Transform raw data into dataframe
    3 - Returns df_covid19_fr (columns = ['date', 'type', 'nombre']) and
                df_covid19_region (columns = ['date', 'maille_nom', 'cas_confirmes', 'deces', 'delta_deces',
                                              'delta_cas_confirmes', 'fatality_rate', 'days_after_5_deaths',
                                               'days_after_50_confirmed']) 
    """
    # 1 - Get data
    data = pd.read_csv(url)
    #data = pd.read_csv(url)
    df_covid19_region = data
    
    df_covid19_region = df_covid19_region.sort_values(by=["CCAA", "fecha"])
    # create a new index based from day after 5 deaths
    df_covid19_region["days_after_5_deaths"] = (
        df_covid19_region[df_covid19_region.muertes > 5]
        .groupby("CCAA")["muertes"]
        .rank(method="first", ascending=True)
    )
    # create a new index based from day after 50 confirmed
    df_covid19_region["days_after_50_confirmed"] = (
        df_covid19_region[df_covid19_region.casos > 50]
        .groupby("CCAA")["casos"]
        .rank(method="first", ascending=True)
    )
    df_covid19_region = df_covid19_region.fillna(value=0)


    return df_covid19_region

In [5]:
df_covid19_region = get_data(URL_OPENCOVID19)
col_idx = [1,2]+list(range(4,9))
df_regions = pd.read_csv(url_pop_ccaa)
#print(df_covid19_es.head())
#print(df_covid19_region.head())
#print(df_regions)

In [6]:
df_covid19_region

Unnamed: 0,CCAA,fecha,casos,IA,UCI,muertes,Hospitalizados,curados,nuevos,days_after_5_deaths,days_after_50_confirmed
0,Andalucía,2020-03-02,12.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Andalucía,2020-03-03,13.0,0.15,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,Andalucía,2020-03-04,13.0,0.15,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Andalucía,2020-03-05,12.0,0.14,1.0,0.0,0.0,0.0,-1.0,0.0,0.0
4,Andalucía,2020-03-06,21.0,0.25,1.0,0.0,0.0,0.0,9.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
603,PaísVasco,2020-03-29,5740.0,231.45,271.0,265.0,3106.0,1503.0,604.0,20.0,21.0
604,PaísVasco,2020-03-30,6057.0,245.81,293.0,297.0,3427.0,1646.0,317.0,21.0,22.0
605,PaísVasco,2020-03-31,6320.0,251.61,307.0,325.0,3594.0,1796.0,263.0,22.0,23.0
606,PaísVasco,2020-04-01,6838.0,265.65,324.0,369.0,3806.0,2165.0,518.0,23.0,24.0


In [7]:
df_regions.head()

Unnamed: 0,CCAA,Densidad,Población
0,Andalucía,96,8426405
1,Aragón,28,1320794
2,Asturias,96,1022293
3,Baleares,238,1187808
4,Canarias,296,2207225


In [8]:
#%% create necessary variables


df_covid19_region = df_covid19_region.merge(df_regions,on='CCAA',how='inner')
#col_idx = [1,2]+list(range(4,9))


col_idx = list(range(0,df_covid19_region.shape[1]))
df_covid19_es = df_covid19_region.iloc[:,col_idx].groupby(['fecha'],as_index=False).sum()
# create a new index based from day after 5 deaths
df_covid19_es["days_after_5_deaths"] = (
    df_covid19_es[df_covid19_es.muertes > 5]
    ["muertes"]
    .rank(method="first", ascending=True)
)
# create a new index based from day after 50 confirmed
df_covid19_es["days_after_50_confirmed"] = (
    df_covid19_es[df_covid19_es.casos > 50]
    ["casos"]
    .rank(method="first", ascending=True)
)
df_covid19_es['CCAA'] = 'Total_pais'
df_covid19_es = df_covid19_es.fillna(value=0)

df_covid19_region = df_covid19_region.append(df_covid19_es)

df_covid19_region["dead_ratio"] = df_covid19_region["muertes"]/df_covid19_region["Población"]*1000
df_covid19_region["cases_ratio"] = df_covid19_region["casos"]/df_covid19_region["Población"]*1000
df_covid19_region["new_cases_ratio"] = df_covid19_region["nuevos"]/df_covid19_region["Población"]*1000

df_covid19_region["fecha_D"] = pd.to_datetime(df_covid19_region["fecha"],format="%Y/%m/%d")

regions = list(df_covid19_region.CCAA.unique())
x_date_var = "fecha_D"


In [9]:
regions

['Andalucía',
 'Aragón',
 'Asturias',
 'Baleares',
 'CValenciana',
 'Canarias',
 'Cantabria',
 'CastillaLaMancha',
 'CastillayLeón',
 'Cataluña',
 'Ceuta',
 'Extremadura',
 'Galicia',
 'LaRioja',
 'Madrid',
 'Melilla',
 'Murcia',
 'Navarra',
 'PaísVasco',
 'Total_pais']

In [10]:
def truncate_10(n,up_down):
    temp = floor(log10(n))
    if up_down == "up":
        round_op = lambda x: ceil(x)
    else:
        round_op = lambda x: floor(x)

    return float(round_op(n/10**temp)*(10**temp))

In [11]:

((truncate_10(0.022,"d")))

0.02

In [12]:
log10(0.03)

-1.5228787452803376

In [16]:
single_nearest = alt.selection_single(on='mouseover', nearest=True,empty='none')
c_diagnosed = (
    alt.Chart(df_covid19_region)
    .mark_line(interpolate="linear")
    .encode(
        alt.X('fecha_D'),
        alt.Y('casos'),                
        color=alt.condition(
            single_nearest,
            alt.Color("CCAA:N", scale=alt.Scale(scheme="category20b")), 
            alt.value('lightgray')),        
        tooltip=['fecha_D', 'casos', "CCAA"],
        #strokeDash="CCAA",
        # strokeDash=alt.condition(
        #     alt.datum.CCAA == "Total_pais", [1,1], [0,0]),
                    
    ).add_selection(
        single_nearest
    ).interactive()
)    
c_diagnosed

In [13]:
#%% get df_covid19_region based on region in multiselection
# unit_testing variables
multiselection = "regions"
viz_option = "graph"
scale = "linear"

# code
df_covid19_region = df_covid19_region[
    df_covid19_region["CCAA"].isin(multiselection)
].sort_values(by=["CCAA", "fecha"], ascending=[True, False])

# intentar pintarlo awquí fuera
# comparar mis datos con los originales
# ver si el script original funciona en interactivo

# c_deaths = (
#     alt.Chart(df_covid19_es).
#     mark_line(point=True).
#     encode(
#             x="days_after_5_deaths",
#             y="muertes",
#         ).interactive()
# )



if st.checkbox("Log Scale"):
    scale = alt.Scale(type="log", domain=[10, 5000], clamp=True)
else:
    scale = alt.Scale(type="linear")
print("I\'m in")
#st.info("""I'm in""")
# make plot on nb of deces by regions
c_deaths = (
    alt.Chart(df_covid19_region)
    .mark_line(point=True)
    .encode(
        alt.X("days_after_5_deaths"),
        alt.Y("muertes", scale=scale),
        alt.Color("CCAA"),
        tooltip=["days_after_5_deaths", "deaths", "CCAA"],
    )
    .interactive()
)
c_deaths2 = (
    alt.Chart(df_covid19_es)
    .mark_line(point=True)
    .encode(
        x=("days_after_5_deaths"),
        y=("muertes"),
    )
    .interactive()
)
alt.Chart(source).mark_line().encode(
    x='date',
    y='price',
    color='symbol',
    ).interactive()
print('Heading out')
# if st.checkbox("Log Scale"):
#     scale2 = alt.Scale(type="log", domain=[10, 5000], clamp=True)
# else:
#     scale2 = alt.Scale(type="linear")        
# c_diagnosed = (
#     alt.Chart(df_covid19_region)
#     .mark_line(point=True)
#     .encode(
#         alt.X("fecha"),
#         alt.Y("casos", scale=scale2),
#         alt.Color("CCAA"),
#         tooltip=["days_after_5_deaths", "casos", "CCAA"],
#     )
#     .interactive()
# )    


pippo = (
    alt.Chart(source).mark_line().encode(
    x='date',
    y='price',
    color='symbol',
    ).interactive()
)


TypeError: only list-like objects are allowed to be passed to isin(), you passed a [str]

In [None]:

alt.Chart(df_covid19_es).mark_line(point=True).encode(
    x=("days_after_5_deaths"),
    y=("muertes"),
).interactive()

In [None]:
df_covid19_region

In [None]:
c_deaths = (
    alt.Chart(df_covid19_region)
    .mark_line(point=True)
    .encode(
        alt.X("fecha"),
        alt.Y("muertes", scale=scale),
        alt.Color("CCAA"),
        tooltip=["fecha", "muertes", "CCAA"],
    )
    .interactive()
)
c_deaths

In [None]:
c_deaths = (
    alt.Chart(df_covid19_region)
    .mark_line(point=True)
    .encode(
        alt.X("fecha_D"),
        alt.Y("muertes", scale=scale),
        alt.Color("CCAA"),
        tooltip=["days_after_5_deaths", "muertes", "CCAA"],
    )
    .interactive()
)
c_deaths

In [None]:
x_date_var

In [None]:
df_covid19_region.dtypes

In [None]:
#x_var = ["days_after_50_confirmed","days_after_5_deaths"]
x_var = [x_date_var,x_date_var]
y_var = ["new_cases_ratio","dead_ratio"]
#y_var = ["casos","muertes"]  

c_heatmap_confirmed = (
    alt.Chart(df_covid19_region)
    .mark_rect()
    .encode(
        alt.X(x_var[0]),
        alt.Y("CCAA:N"),
        alt.Color(y_var[0]+":Q", scale=alt.Scale(scheme="reds")),
        tooltip=[x_var[0], "CCAA", y_var[0]],
    )
    .transform_filter((datum.nuevos >= 0))
    .interactive()
)
c_heatmap_confirmed

In [None]:
regions

In [None]:
alt.Chart(df_covid19_region).mark_bar(binSpacing=1).encode(
    alt.X('fecha_D',type='temporal'),#,timeUnit="week"),
    #alt.X('fecha_D:Q'),
    y="nuevos:Q",    
).properties(
    height=200
).facet(
    row='CCAA:N'
).interactive()

In [None]:
import altair as alt
from altair.expr import datum

from vega_datasets import data
stocks = data.stocks.url

base = alt.Chart(stocks).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
).transform_filter(
    datum.symbol == 'GOOG'
)

base.mark_rule() + base.mark_point()

In [None]:
alt.layer(  
  base.mark_point(),
  base.mark_rule()
).interactive()

In [None]:
base = alt.Chart(df_covid19_region).encode(
    alt.X('fecha_D',type='temporal'),
    y="nuevos:Q",
    color=alt.value('black'),
).properties(
    height=200
)
(alt.layer(  
  base.mark_point(),
  base.mark_rule()
).facet(
    #row='CCAA:N',
    column='CCAA:N',
)
 .interactive())

In [None]:
df_covid19_region.columns

In [None]:
import vega
vega.scheme('basic', ['#f00', '#0f0', '#00f', '#ff0', '#f0f', '#0ff']);

In [None]:
# base1 = 
df_covid19_temp = df_covid19_region[df_covid19_region["CCAA"]=="Total_pais"]
df_covid19_temp

In [None]:
# base1 = 
df_covid19_temp = df_covid19_region[df_covid19_region["CCAA"]=="Total_pais"]
df_covid19_temp
base2 = (
    alt.Chart(df_covid19_temp).transform_fold(
    ['muertes', 'curados'],
).mark_area().encode(
        alt.Y('value:Q',stack=True),    
        x='days_after_50_confirmed',
        color='key:N',
))
base2

In [None]:
base2 = (
    alt.Chart(df_covid19_region).transform_fold(
    ['muertes', 'curados'],
).mark_line().encode(
        alt.Y('value:Q',stack=True),    
        x='days_after_50_confirmed',
        color='key:N',
))
base2

In [None]:
base2 = (
    alt.Chart(df_covid19_region).mark_area().encode(
    x='fechas_D:T',
    #alt.Y('value:Q',stack=True),
    y='muertes'
))
base2

In [None]:

(alt.layer(  
  base1,
  base2,
).facet(
    #row='CCAA:N',
    column='CCAA:N',
)
 .interactive())

In [None]:
source = data.iowa_electricity()

alt.Chart(source).mark_area(opacity=0.3).encode(
    x="year:T",
    y=alt.Y("net_generation:Q", stack=None),
    color="source:N"
)


In [None]:
source = data.iowa_electricity()

alt.Chart(source).mark_area().encode(
    x="year:T",
    y="net_generation:Q",
    color="source:N"
)

In [None]:
source