In [None]:
pip install dash

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
"""Importing the required libraries"""

from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

# import plotly.offline as pyo
import plotly.graph_objs as go

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

"""Building the functions required to scrape the website"""

# ---------------------------------------------------------------------------

def get_country_data(country_line):
    """
    This function formats a given input line parsed from an html page.

    Parameters:
        country_line : str
            it is a row table row, that contains the data.

    Returns:
        line : list
            A list containing all the useful information retrieved.
    """
    import re
    line = country_line.strip().split("\n")
    line.pop(0)
    for i, element in zip(range(len(line)), line):
        if re.search("[1-9]+", element):
            line[i] = float(''.join(line[i].strip('+').split(",")))
        else:
            pass

    return line[:-1]


def get_column_names(tr):
    """
    This function return a well formatted list for the column names.
    """
    line = tr.strip("\n#").strip().split("\n")
    line[12] += line[13]
    line.pop(14)
    line.pop(13)
    return line[1:-1]


def scrape_corona_data():
    """
    This function scrapes the data from the target website and returns a well formatted dict that contains information about every given country.
    """
    from collections import \
        defaultdict  # Importing the defaultdict model, that will be used to store the information while scraping the website
    countries_data = defaultdict(dict)
    coronameter = requests.get(
        "https://www.worldometers.info/coronavirus/")  # requesting the index page from the server, it is also where our information resides
    bscorona = BeautifulSoup(coronameter.text, "lxml")  # parsing the webpage to a beautifulsoup object.
    corona_table = bscorona.find("table",
                                 id="main_table_countries_today")  # selecting the table where our data is contained.
    print(corona_table.tr.text)
    column_names = get_column_names(corona_table.tr.text)
    print(column_names)
    for tr in corona_table.find_all("tr", {"style": ""})[2:-2]:
        line = get_country_data(tr.text)
        countries_data[line[0]] = dict(zip(column_names, line[1:]))
    return countries_data


def replace_nan(data):
    """
    This function replaces empty or N/A values with np.nan so that it can be easier to manipulate the data later on.
    """
    for col in data.columns:
        data[col].replace(["N/A", "", " "], np.nan, inplace=True)


def create_clean_dataframe(countries_data):
    """
    This function takes a dict object and create a clean well formatted dataframe.

    Parameters:
        countries_data : dict object
            The dict that contains the countries data.
    Returns:
        data : dataframe
            Well formatted dataframe.
    """
    data = pd.DataFrame(countries_data).transpose()
    replace_nan(data)
    return data


"""Building the plotting functions"""

# ---------------------------------------------------------------------------

def plot_continent_data(data, keyword):
    """
    This function creates a Figure from continental data.

    Parameters:
        data : dataframe
            The whole dataset.
        keyword : str
            The keyword used to define the figure wanted, the available keyword : {"Total", "New"}

    Returns:
        fig : Figure
            The figure that will be drawed on plotly.
    """
    if keyword == "New":
        cols = ["NewCases", "NewRecovered", "NewDeaths"]
    else:
        cols = ["TotalCases", "TotalRecovered", "TotalDeaths"]
    res = data.groupby("Continent")[cols].sum()

    plot_data = []
    colors = ["#031cfc", "#24b514", "#d11d1d"]
    for col, color in zip(cols, colors):
        plot_data.append(go.Bar(x=res.index.to_list(), y=res[col], name=col, marker=dict(color=color)))

    layout = go.Layout(title=f"Corona {keyword} Cases/Recovered/Deaths",
                       xaxis=dict(title="Continents"),
                       yaxis=dict(title="Cases per Continent"))

    fig = go.Figure(data=plot_data, layout=layout)
    return fig


def get_continent_sorted_data(data, continent, sortedby="TotalCases", ascending=False):
    """
    This function creates a sorted dataframe related to a continent and sorted by a columns.

    Parameters:
        data : dataframe
            The whole dataset.
        continent : str
            The continent we want to get the data from.
        sortedby : str, Default="TotalCases"
            The name of the column we want to sort by.
        ascending : Boolean, Default=False
            Either we want to sort in an ascending order or descending order.
    Returns:
        groupedbydata : dataframe
            A dataframe groupedby the continent.
    """
    return data.groupby("Continent").get_group(continent).sort_values(by=sortedby, ascending=ascending).reset_index()


def get_top_k_countries(data, k_countries=10, sortedby="TotalCases", ascending=False):
    """
    This function creates a k-len dataframe sorted by a key.

    Parameters:
        data : dataframe.
            The whole dataset.
        k_countries : int, Default=10
            The number of countries you want to plot.
        sortedby : str, Default="TotalCases".
            The column name we want to sort the data by
        ascending : Boolean, Default=False
            Either we want to sort in an ascending order or descending order.

    Returns:
        data : dataframe
            The k_contries lines dataframe sortedby the key given and in the wanted order.
    """
    return data.sort_values(by=sortedby, ascending=ascending).iloc[:k_countries]


def plot_top_k_countries(n_countries, sortby):
    """This function returns a figure where a number of countries are sorted by the value that resides in sortby."""
    res = get_top_k_countries(data, n_countries, sortby)
    plot_data = []

    plot_data.append(go.Bar(x=res.index.to_list(), y=res[sortby], name=sortby))

    layout = go.Layout(title=f"Top Countries orderedby {sortby}",
                       xaxis=dict(title="Countries"),
                       yaxis=dict(title=f"{sortby}"))

    fig = go.Figure(data=plot_data, layout=layout)
    return fig


def plot_boxplots(data, keyword="Deaths/1M pop"):
    """This function returns a figure of the boxplot related to each continent in regards to the keyword."""
    plot_data = []
    grouped_data = data.groupby("Continent")
    continents = data["Continent"].value_counts().index.to_list()
    for continent in continents:
        plot_data.append(go.Box(y=grouped_data.get_group(continent)[keyword], name=continent))
    layout = go.Layout(title=f"Boxplots using {keyword}",
                       xaxis=dict(title="Continents"),
                       yaxis=dict(title=f"{keyword}"))
    fig = go.Figure(data=plot_data, layout=layout)
    return fig



def plot_pie_chart(data, keyword):
    # Filter data for the specified continent
    continent_data = data[data['Continent'] == keyword]

    # Calculate the sum of TotalCases, TotalDeaths, TotalRecovered, and TotalTests for the continent
    total_cases = continent_data['TotalCases'].sum()
    total_deaths = continent_data['TotalDeaths'].sum()
    total_recovered = continent_data['TotalRecovered'].sum()
    total_test = continent_data['TotalTests'].sum()

    # Create pie chart trace
    labels = ['Total Cases', 'Total Deaths', 'Total Recovered', 'Total Tests']
    values = [total_cases, total_deaths, total_recovered, total_test]
    pie_chart_trace = go.Pie(labels=labels, values=values)

    # Create layout
    layout = go.Layout(title=f'Total updates of coronavirus in {keyword}')

    # Create figure
    fig = go.Figure(data=[pie_chart_trace], layout=layout)

    return fig

def plot_active_vs_critical(data, keyword):
    """
    This function creates a bar chart representing the number of active cases and serious/critical cases for a specific continent,
    along with curves showing the trend of these cases over time.

    Parameters:
        data : dataframe
            The whole dataset.
        continent : str
            The name of the continent.

    Returns:
        fig : Figure
            The figure that will be drawn on plotly.
    """
    # Filter the data for the specified continent
    continent_data = data[data['Continent'] == keyword]

    # Get the number of active cases and serious/critical cases
    active_cases = continent_data['ActiveCases']
    serious_cases = continent_data['Serious,Critical']

    # Create the bar trace
    bar_trace = go.Bar(
        x=['Active Cases', 'Serious/Critical Cases'],
        y=[active_cases.iloc[-1], serious_cases.iloc[-1]],
        name=keyword
    )

    # Create the active cases curve trace
    active_curve_trace = go.Scatter(
        x=continent_data.index,
        y=active_cases,
        name='Active Cases',
        mode='lines'
    )

    # Create the serious/critical cases curve trace
    critical_curve_trace = go.Scatter(
        x=continent_data.index,
        y=serious_cases,
        name='Serious/Critical Cases',
        mode='lines'
    )

    # Create the layout
    layout = go.Layout(
        title=f'Active Cases vs Serious/Critical Cases ({keyword})',
        yaxis=dict(title='Count')
    )

    # Create the figure
    fig = go.Figure(data=[bar_trace, active_curve_trace, critical_curve_trace], layout=layout)
    return fig

import plotly.graph_objs as go

def plot_population(data, continent, keyword):
    """
    This function creates a bar chart representing the average new cases and new deaths per million population for a specific continent.

    Parameters:
        data : dataframe
            The whole dataset.
        continent : str
            The name of the continent.
        keyword : str
            The column name of the desired keyword.

    Returns:
        fig : Figure
            The figure that will be drawn on plotly.
    """
    # Filter the data for the specified continent
    continent_data = data[data['Continent'] == continent]

    # Calculate the average new cases and new deaths per million population
    avg_new_cases = continent_data[keyword].mean()
    avg_population = continent_data['Population'].mean()

    # Create the bar trace
    trace = go.Bar(
        x=[keyword, 'Population'],
        y=[avg_new_cases, avg_population],
        name=continent,
        marker=dict(
            color=['blue', 'green']  # Specify the colors of the bars
        )
    )

    # Create the layout
    layout = go.Layout(
        title=f'Average {keyword} and Population ({continent})',
        yaxis=dict(title='Count')
    )

    # Create the figure
    fig = go.Figure(data=[trace], layout=layout)
    return fig

def init_figure():
    "This function initiate all the needed figure to start the app."
    return plot_continent_data(data, keyword="New"), plot_top_k_countries(10, "TotalCases"),plot_pie_chart(data, keyword = 'Asia'), plot_boxplots(data), plot_active_vs_critical(data, keyword = 'South America'), plot_population(data, continent = 'Africa', keyword = 'TotalCases')



"""Initiale Figures"""
# ---------------------------------------------------------------------------

countries_data = scrape_corona_data()
data = create_clean_dataframe(countries_data)

init_continent_fig, init_k_countries_plot, init_pie_fig, init_box_fig, init_active_vs_critical, init_population = init_figure()



"""Building the app"""
# ---------------------------------------------------------------------------

# Initializing the app
app = dash.Dash(__name__)
server = app.server

# Building the app layout
app.layout = html.Div([
    html.H1("Corona Tracker DashBoard", style={"text-align": "center"}),
    html.Br(),
    html.Div([
        html.Br(),
        html.H2("Corona Cases/Recovered/Deaths by Continent", style={"text-align": "center"}),
        html.Br(),
        dcc.Dropdown(id="select_keyword",
                     options=[
                         dict(label="Today's Data", value="New"),
                         dict(label="Total Data", value="Total")],
                     multi=False,
                     value="New",
                     style={"width": "40%"}
                     ),

        dcc.Graph(id="continent_corona_bar", figure=init_continent_fig)
    ]),

    html.Div([
        html.Br(),
        html.H2("Visualize Countries by attribute.", style={"text-align": "center"}),
        html.Br(),
        dcc.Dropdown(id="select_attribute",
                     options=[
                         dict(label="Total Cases", value='TotalCases'),
                         dict(label="New Cases", value='NewCases'),
                         dict(label="Total Cases per 1M population", value='Tot Cases/1M pop'),
                         dict(label="Active Cases", value='ActiveCases'),
                         dict(label="Serious, Critical Cases", value='Serious,Critical'),
                         dict(label="Total Deaths", value='TotalDeaths'),
                         dict(label="New Deaths", value='NewDeaths'),
                         dict(label="Deaths per 1M population", value='Deaths/1M pop'),
                         dict(label="Total Recovered", value='TotalRecovered'),
                         dict(label="New Recovered", value='NewRecovered'),
                         dict(label="Total Tests", value='TotalTests'),
                         dict(label="Tests per 1M population", value='Tests/1M pop')],
                     multi=False,
                     value="TotalCases",
                     style={"width": "60%", 'display': 'inline-block'}
                     ),
        dcc.Dropdown(id="select_k_countries",
                     options=[
                         dict(label="Top 5", value=5),
                         dict(label="Top 10", value=10),
                         dict(label="Top 25", value=25),
                         dict(label="Top 50", value=50),
                     ],
                     multi=False,
                     value=10,
                     style={"width": "30%", 'display': 'inline-block'}
                     ),

        dcc.Graph(id="k_countries_sorted", figure=init_k_countries_plot)
    ]),

    html.Div([
        html.Br(),
        html.H2("BoxPlot to explain the distribution of the variables", style={"text-align": "center"}),
        html.Br(),
        dcc.Dropdown(id="select_box_attribute",
                     options=[
                         dict(label="Deaths per 1M population", value='Deaths/1M pop'),
                         dict(label="Tests per 1M population", value='Tests/1M pop')
                     ],
                     multi=False,
                     value="Deaths/1M pop",
                     style={"width": "40%"}
                     ),

        dcc.Graph(id="continent_box_plot", figure=init_box_fig)
    ]),
    html.Div([
        html.Br(),
        html.H2("Pie Chart to explain the distribution new updates of Coronavirus ", style={"text-align": "center"}),
        html.Br(),
        dcc.Dropdown(
            id="continent-pie",
            options=[
                dict(label="Africa", value='Africa'),
                dict(label="Asia", value='Asia'),
                dict(label="Europe", value='Europe'),
                dict(label="North America", value='North America'),
                dict(label="Australia/Oceania", value='Australia/Oceania'),
                dict(label="South America", value='South America'),
            ],
            multi=False,
            value="Asia",
            style={"width": "40%"}
        ),

        dcc.Graph(id="pie-chart", figure=init_pie_fig)
    ]),
    html.Div([
            html.Br(),
            html.H2("Comparison of Active cases vs Critical cases ", style={"text-align": "center"}),
            html.Br(),
            dcc.Dropdown(
                id="continent-active",
                options=[
                    dict(label="Africa", value='Africa'),
                    dict(label="Asia", value='Asia'),
                    dict(label="Europe", value='Europe'),
                    dict(label="North America", value='North America'),
                    dict(label="Australia/Oceania", value='Australia/Oceania'),
                    dict(label="South America", value='South America'),
                ],
                multi=False,
                value="South America",
                style={"width": "40%"}
            ),

            dcc.Graph(id="plot_curve", figure=init_active_vs_critical)
        ]),
    html.Div([
            html.Br(),
            html.H2("Visualize Population by attribute.", style={"text-align": "center"}),
            html.Br(),
            dcc.Dropdown(id="select-an-attribute",
                         options=[
                             dict(label="Total Cases", value='TotalCases'),
                             dict(label="New Cases", value='NewCases'),
                             dict(label="Total Cases per 1M population", value='Tot Cases/1M pop'),
                             dict(label="Active Cases", value='ActiveCases'),
                             dict(label="Serious, Critical Cases", value='Serious,Critical'),
                             dict(label="Total Deaths", value='TotalDeaths'),
                             dict(label="New Deaths", value='NewDeaths'),
                             dict(label="Deaths per 1M population", value='Deaths/1M pop'),
                             dict(label="Total Recovered", value='TotalRecovered'),
                             dict(label="New Recovered", value='NewRecovered'),
                             dict(label="Total Tests", value='TotalTests'),
                             dict(label="Tests per 1M population", value='Tests/1M pop')],
                         multi=False,
                         value="TotalCases",
                         style={"width": "60%", 'display': 'inline-block'}
                         ),
            dcc.Dropdown(id="select-continent",
                         options=[
                             dict(label="Africa", value='Africa'),
                             dict(label="Asia", value='Asia'),
                             dict(label="Europe", value='Europe'),
                             dict(label="North America", value='North America'),
                             dict(label="Australia/Oceania", value='Australia/Oceania'),
                             dict(label="South America", value='South America'),
                         ],
                         multi=False,
                         value='Africa',
                         style={"width": "30%", 'display': 'inline-block'}
                         ),

            dcc.Graph(id="id-pop", figure=init_population)
        ])
    ])


# Defining the application callbacks

@app.callback(
    Output("continent_corona_bar", "figure"),
    Input("select_keyword", "value")
)
def update_continent_corona_bar(value):
    return plot_continent_data(data, keyword=value)


@app.callback(
    Output("k_countries_sorted", "figure"),
    Input("select_attribute", "value"),
    Input("select_k_countries", "value")
)
def update_k_countries_sorted(attribute, n_countries):
    return plot_top_k_countries(n_countries, attribute)

@app.callback(
    Output("pie-chart", "figure"),
    Input("continent-pie", "value")
)
def update_pie_chart(value):
    return plot_pie_chart(data, keyword=value)

@app.callback(
    Output("continent_box_plot", "figure"),
    Input("select_box_attribute", "value")
)
def update_continent_box_plot(value):
    return plot_boxplots(data, keyword=value)

@app.callback(
    Output("plot_curve", "figure"),
    Input("continent-active", "value")
)
def update_active_vs_critical_plot(value):
    return plot_active_vs_critical(data, keyword=value)

@app.callback(
    Output("id-pop", "figure"),
    Input("select-an-attribute", "value"),
    Input("select-continent", "value")
)
def update_plot_population(attribut, continent):
    return plot_population(data, continent, attribut)

if __name__ == "__main__":
    countries_data = scrape_corona_data()
    data = create_clean_dataframe(countries_data)
    app.run_server()




The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html



#
Country,Other
TotalCases
NewCases
TotalDeaths
NewDeaths
TotalRecovered
NewRecovered
ActiveCases
Serious,Critical
Tot Cases/1M pop
Deaths/1M pop
TotalTests
Tests/
1M pop

Population
Continent
1 Caseevery X ppl1 Deathevery X ppl1 Testevery X ppl
New Cases/1M pop
New Deaths/1M pop
Active Cases/1M pop

['TotalCases', 'NewCases', 'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered', 'ActiveCases', 'Serious,Critical', 'Tot\xa0Cases/1M pop', 'Deaths/1M pop', 'TotalTests', 'Tests/1M pop', 'Population', 'Continent', '1 Caseevery X ppl1 Deathevery X ppl1 Testevery X ppl', 'New Cases/1M pop', 'New Deaths/1M pop']

#
Country,Other
TotalCases
NewCases
TotalDeaths
NewDeaths
TotalRecovered
NewRecovered
ActiveCases
Serious,Critical
Tot Cases/1M pop
Deaths/1M pop
TotalTests
Tests/
1M pop

Population
Continent
1 Caseevery X ppl1 Deathevery X ppl1 Testevery X ppl
New Cases/1M pop
New Deaths/1M pop
Active Cases/1M pop

['TotalCases', 'NewCases', 'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'Ne

INFO:dash.dash:Dash is running on http://127.0.0.1:8050/



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
INFO:werkzeug:[33mPress CTRL+C to quit[0m
