In [3]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np



%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 10)
pd.set_option('display.max_rows', 500)

import plotly.graph_objects as go

![CRISP_DM](figures/CRISP_DM.png)

# DATA LOAD

In [5]:
df_analyse=pd.read_csv('eps_covid_19\data\processed\covid_small_table.csv',sep=';',
                      parse_dates=[0])
df_analyse.sort_values('date',ascending=True).tail()

Unnamed: 0,date,Italy,US,Spain,Germany,Afghanistan
851,2022-05-22,17247552,83282408,12234806,26045528,179716
852,2022-05-23,17257573,83390587,12234806,26109965,179716
853,2022-05-24,17288287,83501457,12238073,26159106,179771
854,2022-05-25,17312432,83718160,12238073,26198811,179835
855,2022-05-26,17333299,83837114,12280345,26200663,179835


In [6]:
def quick_plot(x_in, df_input,y_scale='log',slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    fig.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                 nticks=20,
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [9]:
quick_plot(df_analyse.date,
           df_analyse.iloc[:,1:],
           #y_scale='log',
           y_scale='linear',
           slider=True)

In [10]:
threshold=100

In [11]:
comparison_list=[]
for pos,country in enumerate(df_analyse.columns[1:]):
    comparison_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

In [17]:
pd_sync_timeline=pd.DataFrame(comparison_list,index=df_analyse.columns[1:]).T

In [19]:
pd_sync_timeline['date']=np.arange(pd_sync_timeline.shape[0])


In [20]:
pd_sync_timeline.head()

Unnamed: 0,Italy,US,Spain,Germany,Afghanistan,date
0,155.0,107.0,120.0,117.0,106.0,0
1,229.0,184.0,165.0,150.0,114.0,1
2,322.0,237.0,222.0,188.0,114.0,2
3,453.0,403.0,259.0,240.0,166.0,3
4,655.0,519.0,400.0,349.0,192.0,4


In [21]:
quick_plot(pd_sync_timeline.date,
           pd_sync_timeline.iloc[:,:-1],
           y_scale='log',
           slider=True)