In [8]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_rows', None)
import datetime
from plotly.subplots import make_subplots
import requests
from bs4 import BeautifulSoup

In [9]:
file = pd.read_csv('../data/raw/datasets_494766_1278460_worldometer_data.csv')


In [10]:
figure1 = go.Figure(go.Funnel(
    y = ["Total Cases", "Total Recovered", "Active Cases", "Deaths"],
    x = [file['TotalCases'].sum(),file['TotalRecovered'].sum(),file['ActiveCases'].sum(),file['TotalDeaths'].sum()],
    textposition = "inside",
    textinfo = "value",
    opacity = 0.9, 
    marker = {"color": ["Blue", "Green", "Red", "Black"],"line": {"width": 4, "color": 'Black'}}
                             ))

figure1.update_layout(
    template="plotly_white",
    title={
        'text': " current situation of covid19 over the word ",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
figure1.update_layout( width=700,height=600)

figure1.show()

In [11]:
file_countries = pd.read_csv("../data/raw/full_grouped.csv")


In [12]:
file_countries.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa


Cov-19 Map 

In [13]:
fig = px.choropleth(file_countries, locations=file_countries['Country/Region'],
                    color=file_countries['Confirmed'], locationmode='country names',
                    hover_name=file_countries['Country/Region'],hover_data =['Country/Region','Confirmed'],
                    color_continuous_scale='ylgnbu',template='plotly_white', animation_frame = 'Date')
fig.update_layout(
    title='Confirmed Cases In Each Country over Time',
)
fig.update_layout( width=700,height=600)
fig.show()

In [14]:
file1 = pd.read_csv('../data/raw/cov19_dataSets/covid_19_data.csv')



In [15]:
Data = file1[file1['ObservationDate'] == max(file1['ObservationDate'])].reset_index()


In [16]:
Data["Province/State"]= Data["Province/State"].fillna('Unknown')
Data.head()

Unnamed: 0,index,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,47366,47367,06/22/2020,Unknown,Afghanistan,2020-06-23 04:33:22,29157.0,598.0,8841.0
1,47367,47368,06/22/2020,Unknown,Albania,2020-06-23 04:33:22,1995.0,44.0,1159.0
2,47368,47369,06/22/2020,Unknown,Algeria,2020-06-23 04:33:22,11920.0,852.0,8559.0
3,47369,47370,06/22/2020,Unknown,Andorra,2020-06-23 04:33:22,855.0,52.0,796.0
4,47370,47371,06/22/2020,Unknown,Angola,2020-06-23 04:33:22,186.0,10.0,77.0


In [23]:
file1['Active_case'] = file1['Confirmed'] - file1['Deaths'] - file1['Recovered']
file1.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0,1.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0,14.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0,6.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0,1.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0,0.0


In [24]:
data_over_time= file1.groupby(["ObservationDate"])[["Confirmed","Active_case","Recovered","Deaths"]].sum().reset_index().sort_values("ObservationDate",ascending=True).reset_index(drop=True)



In [28]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Confirmed'],
                    mode='lines',marker_color='yellow',
                    name='confirmed cases',line=dict( dash='dot')))

fig.update_layout(
    title='confirmed cases over time in the world',
        template='plotly_dark',
      yaxis_title="confirmed cases",
    xaxis_title="Days",

)

fig.show()

In [29]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Recovered'],
                    mode='lines',marker_color='yellow',
                    name='Recovered cases',line=dict( dash='dot')))

fig.update_layout(
    title='Recovered cases over time in the world',
        template='plotly_dark',
      yaxis_title="Recovered cases",
    xaxis_title="Days",

)

fig.show()

In [30]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Active_case'],
                    mode='lines',marker_color='yellow',
                    name='Active cases',line=dict( dash='dot')))

fig.update_layout(
    title='Active cases over time in the world',
        template='plotly_dark',
      yaxis_title="Active cases",
    xaxis_title="Days",

)

fig.show()

In [32]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Deaths'],
                    mode='lines',marker_color='yellow',
                    name='Deaths',line=dict( dash='dot')))

fig.update_layout(
    title='Deaths cases over time in the world',
        template='plotly_dark',
      yaxis_title="Deaths cases",
    xaxis_title="Days",

)

fig.show()

In [33]:
fig = go.Figure(go.Bar(
            x=data_over_time['ObservationDate'],
            y=data_over_time['Confirmed'],
           ))
fig.update_layout(
    title='Confirmed Cases each day',
    template='plotly_white',
     xaxis_title="Confirmed Cases",
    yaxis_title="Days",
)
fig.show()

In [34]:
Data_per_country = Data.groupby(["Country/Region"])["Confirmed","Active_case","Recovered","Deaths"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [210]:
fig = go.Figure(go.Bar(
            x=Data_per_country['Country/Region'][0:8],
            y=Data_per_country['Confirmed'][0:8],
            orientation='v'))
fig.update_layout(
    title='Confirmed Cases In Each Country',
    template='plotly_white',
     xaxis_title="Confirmed Cases",
    yaxis_title="Countries",
)
fig.show()

In [211]:
fig = go.Figure(go.Bar(
            x=Data_per_country['Country/Region'][0:10],
            y=Data_per_country['Active_case'][0:10],
            orientation='v',
            marker_color='#DC3912',))
fig.update_layout(
    title='Active Cases In Each Country',
    template='plotly_white',
    xaxis_title="Active Cases",
    yaxis_title="Countries",
)
fig.show()

In [212]:
Data_per_country.head()

Unnamed: 0,Country/Region,Confirmed,Active_case,Recovered,Deaths
0,US,2312300.0,1551700.0,640198.0,120402.0
1,Brazil,1106470.0,453463.0,601736.0,51271.0
2,Russia,591465.0,239422.0,343847.0,8196.0
3,India,440215.0,178014.0,248190.0,14011.0
4,UK,306761.0,262708.0,1322.0,42731.0


In [213]:
Deaths_per_country = file1.groupby(["Country/Region"])["Deaths"].sum().reset_index().sort_values("Deaths",ascending=False).reset_index(drop=True)


In [214]:
fig = go.Figure(data=[go.Bar(
            x=Data_per_country['Country/Region'][0:10], y=Data_per_country['Deaths'][0:10],
            text=Data_per_country['Deaths'][0:10],
            textposition='auto',
            marker_color='black'

        )])
fig.update_layout(
    title='Most 10 infected Countries',
    xaxis_title="Countries",
    yaxis_title="Deaths",
        template='plotly_white'

)
fig.show()

# Data Analysis For Syria : 

In [80]:
Data_Syria = file1 [(file1['Country/Region'] == 'Syria') ].reset_index(drop=True)
Data_Syria.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,7761,03/22/2020,Unknown,Syria,3/8/20 5:31,1.0,0.0,0.0,1.0
1,8059,03/23/2020,Unknown,Syria,2020-03-23 23:23:20,1.0,0.0,0.0,1.0
2,8356,03/24/2020,Unknown,Syria,2020-03-24 23:41:50,1.0,0.0,0.0,1.0
3,8658,03/25/2020,Unknown,Syria,2020-03-25 23:37:49,5.0,0.0,0.0,5.0
4,8961,03/26/2020,Unknown,Syria,2020-03-26 23:53:24,5.0,0.0,0.0,5.0


In [152]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Data_Syria['ObservationDate'], y=Data_Syria['Confirmed'],
                    mode='lines',
                    name='Confirmed cases'))

fig.add_trace(go.Scatter(x=Data_Syria['ObservationDate'], y=Data_Syria['Active_case'],
                    mode='lines',
                    name='Active cases',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=Data_Syria['ObservationDate'], y=Data_Syria['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=Data_Syria['ObservationDate'], y=Data_Syria['Recovered'],
                    mode='lines',
                    name='Recovered cases',marker_color='green'))
fig.update_layout(
    title='Evolution of cases over time in Syria',
        template='plotly_white'

)

fig.show()

In [81]:
fig = go.Figure(go.Bar(
            x=Data_Syria['ObservationDate'],
            y=Data_Syria['Confirmed'],
    marker_color='rgb(13,48,100)'
           ))
fig.update_layout(
    title='Confirmed cases In Each Day',
    template='plotly_white',
     xaxis_title="Confirmed cases",
    yaxis_title="Days",
)
fig.show()

In [82]:
fig = go.Figure(go.Bar(
            x=Data_Syria['ObservationDate'],
            y=Data_Syria['Recovered'],
    marker_color='rgb(13,48,100)'
           ))
fig.update_layout(
    title='Recovered cases In Each Day',
    template='plotly_white',
     xaxis_title="Recovered cases",
    yaxis_title="Days",
)
fig.show()

In [83]:
fig = go.Figure(go.Bar(
            x=Data_Syria['ObservationDate'],
            y=Data_Syria['Deaths'],
    marker_color='rgb(13,48,100)'
           ))
fig.update_layout(
    title='Deaths In Each Day',
    template='plotly_white',
     xaxis_title="Deaths",
    yaxis_title="Days",
)
fig.show()

In [84]:
Data_Syria_last = Data_Syria[Data_Syria['ObservationDate'] == max(Data_Syria['ObservationDate'])].reset_index()
Data_Syria_last

Unnamed: 0,index,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,92,47515,06/22/2020,Unknown,Syria,2020-06-23 04:33:22,219.0,7.0,83.0,129.0


In [108]:
colors = ['rgb(2,58,88)','rgb(65,171,93)', 'rgb(127,0,0)']
labels = ["Active cases","Recovered","Deaths"]
values = Data_Syria_last.loc[0, ["Active_case","Recovered","Deaths"]]

fig = go.Figure(data=[go.Pie(labels=labels,
                             values=values)])
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

# Data Analysis For Spain : 

In [101]:
Data_Spain = file1 [(file1['Country/Region'] == 'Spain') ].reset_index(drop=True)
Data_Spain.head()


Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,552,02/01/2020,Unknown,Spain,2/1/2020 2:13,1.0,0.0,0.0,1.0
1,619,02/02/2020,Unknown,Spain,2020-02-01T23:43:02,1.0,0.0,0.0,1.0
2,688,02/03/2020,Unknown,Spain,2020-02-01T23:43:02,1.0,0.0,0.0,1.0
3,758,02/04/2020,Unknown,Spain,2020-02-01T23:43:02,1.0,0.0,0.0,1.0
4,828,02/05/2020,Unknown,Spain,2020-02-01T23:43:02,1.0,0.0,0.0,1.0


In [102]:
Data_spain = Data_Spain[Data_Spain['ObservationDate'] == max(Data_Spain['ObservationDate'])].reset_index()
Data_spain_last= Data_spain.groupby(["Country/Region"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().reset_index(drop=True)



Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [135]:
labels = ["Active cases","Recovered","Deaths"]
values = Data_spain_last.loc[0, ["Active_case","Recovered","Deaths"]]
df = px.data.tips()
fig = px.pie(Data_spain_last, values=values, names=labels, color_discrete_sequence=['green','red','grey'], hole=0.5)
fig.update_layout(
    title='Total cases in Spain : '+str(Data_spain_last["Confirmed"][0]),
)
fig.show()

In [168]:
Data_spain_per_state= Data_spain.groupby(["Province/State"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [169]:


Data_spain_per_state.head(10)

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active_case
0,Madrid,71248.0,8416.0,40736.0,22096.0
1,Catalonia,60714.0,5666.0,26203.0,28845.0
2,Castilla y Leon,19516.0,2778.0,8716.0,8022.0
3,Castilla - La Mancha,17987.0,3022.0,6392.0,8573.0
4,Pais Vasco,13715.0,1555.0,16160.0,-4000.0
5,Andalusia,12892.0,1426.0,10671.0,795.0
6,C. Valenciana,11486.0,1431.0,9970.0,85.0
7,Galicia,9174.0,619.0,9204.0,-649.0
8,Aragon,5964.0,911.0,3772.0,1281.0
9,Navarra,5402.0,528.0,3905.0,969.0


In [170]:
fig = px.pie(Data_spain_per_state, values=Data_spain_per_state['Confirmed'], names=Data_spain_per_state['Province/State'],
             title='Confirmed cases in Spain',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [171]:
fig = px.pie(Data_spain_per_state, values=Data_spain_per_state['Recovered'], names=Data_spain_per_state['Province/State'],
             title='Recoverd cases in Spain',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [172]:
fig = go.Figure(data=[go.Bar(
            x=Data_spain_per_state['Province/State'][0:10], y=Data_spain_per_state['Deaths'][0:10],
            text=Data_spain_per_state['Deaths'][0:10],
            textposition='auto',
            marker_color='black'

        )])
fig.update_layout(
    title='Death in Spain',
    xaxis_title="states",
    yaxis_title="Deaths",
        template='plotly_white'

)
fig.show()

# Data analysis for Italy 

In [130]:
Data_Italy = file1 [(file1['Country/Region'] == 'Italy') ].reset_index(drop=True)
Data_Italy.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,481,01/31/2020,Unknown,Italy,1/31/2020 23:59,2.0,0.0,0.0,2.0
1,540,02/01/2020,Unknown,Italy,1/31/2020 8:15,2.0,0.0,0.0,2.0
2,609,02/02/2020,Unknown,Italy,2020-01-31T08:15:53,2.0,0.0,0.0,2.0
3,676,02/03/2020,Unknown,Italy,2020-01-31T08:15:53,2.0,0.0,0.0,2.0
4,744,02/04/2020,Unknown,Italy,2020-01-31T08:15:53,2.0,0.0,0.0,2.0


In [131]:
Data_Italy = Data_Italy[Data_Italy['ObservationDate'] == max(Data_Italy['ObservationDate'])].reset_index()
Data_Italy_last= Data_Italy.groupby(["Country/Region"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().reset_index(drop=True)




Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [179]:
labels = ["Active cases","Recovered","Deaths"]
values = Data_Italy_last.loc[0, ["Active_case","Recovered","Deaths"]]
df = px.data.tips()
fig = px.pie(Data_Italy_last, values=values, names=labels, color_discrete_sequence=['green','grey','red'], hole=0.5)
fig.update_layout(
    title='Total cases in Italy : '+str(Data_Italy_last["Confirmed"][0]),
)
fig.show()

In [136]:
Data_italy_per_state= Data_Italy.groupby(["Province/State"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [137]:
Data_italy_per_state.head(10)

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active_case
0,Lombardia,93111.0,16573.0,62900.0,13638.0
1,Piemonte,31248.0,4055.0,25223.0,1970.0
2,Emilia-Romagna,28243.0,4235.0,22854.0,1154.0
3,Veneto,19247.0,2003.0,16659.0,585.0
4,Toscana,10211.0,1099.0,8752.0,360.0
5,Liguria,9935.0,1552.0,8133.0,250.0
6,Lazio,8025.0,830.0,6250.0,945.0
7,Marche,6774.0,994.0,5275.0,505.0
8,Campania,4624.0,431.0,4060.0,133.0
9,Puglia,4527.0,540.0,3769.0,218.0


In [138]:
fig = px.pie(Data_italy_per_state, values=Data_italy_per_state['Confirmed'], names=Data_italy_per_state['Province/State'],
             title='Confirmed cases in italy',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [139]:
fig = px.pie(Data_italy_per_state, values=Data_italy_per_state['Recovered'], names=Data_italy_per_state['Province/State'],
             title='Recoverd cases in Italy',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [185]:
fig = go.Figure(data=[go.Bar(
            x=Data_italy_per_state['Province/State'][0:10], y=Data_italy_per_state['Deaths'][0:10],
            text=Data_italy_per_state['Deaths'][0:10],
            textposition='auto',
            marker_color='black'

        )])
fig.update_layout(
    title='Death in Italy',
    xaxis_title="states",
    yaxis_title="Deaths",
        template='plotly_white'

)
fig.show()

# Data anlysis for Brazil 

In [175]:
Data_Brazil = file1 [(file1['Country/Region'] == 'Brazil') ].reset_index(drop=True)
Data_Brazil.head(10)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,83,01/23/2020,Unknown,Brazil,1/23/20 17:00,0.0,0.0,0.0,0.0
1,2456,02/26/2020,Unknown,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0,1.0
2,2560,02/27/2020,Unknown,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0,1.0
3,2669,02/28/2020,Unknown,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0,1.0
4,2777,02/29/2020,Unknown,Brazil,2020-02-29T21:03:05,2.0,0.0,0.0,2.0
5,2904,03/01/2020,Unknown,Brazil,2020-02-29T21:03:05,2.0,0.0,0.0,2.0
6,3033,03/02/2020,Unknown,Brazil,2020-02-29T21:03:05,2.0,0.0,0.0,2.0
7,3174,03/03/2020,Unknown,Brazil,2020-02-29T21:03:05,2.0,0.0,0.0,2.0
8,3323,03/04/2020,Unknown,Brazil,2020-03-04T20:33:02,4.0,0.0,0.0,4.0
9,3487,03/05/2020,Unknown,Brazil,2020-03-04T20:33:02,4.0,0.0,0.0,4.0


In [177]:
Data_brazil_last= Data_brazil.groupby(["Country/Region"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().reset_index(drop=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [178]:
labels = ["Active cases","Recovered","Deaths"]
values = Data_brazil_last.loc[0, ["Active_case","Recovered","Deaths"]]
df = px.data.tips()
fig = px.pie(Data_brazil_last, values=values, names=labels, color_discrete_sequence=['green','grey','red'], hole=0.5)
fig.update_layout(
    title='Total cases in Brazil : '+str(Data_brazil_last["Confirmed"][0]),
)
fig.show()

In [180]:
Data_brazil_per_state= Data_Brazil.groupby(["Province/State"])["Confirmed","Deaths","Recovered","Active_case"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [181]:
Data_brazil_per_state.head(10)

Unnamed: 0,Province/State,Confirmed,Deaths,Recovered,Active_case
0,Sao Paulo,4709943.0,302775.0,739425.0,3667743.0
1,Unknown,4292124.0,285581.0,5364256.0,-1357713.0
2,Rio de Janeiro,2163499.0,213498.0,987352.0,962649.0
3,Ceara,2065717.0,131016.0,881968.0,1052733.0
4,Para,1735335.0,115208.0,883013.0,737114.0
5,Amazonas,1570130.0,74707.0,693602.0,801821.0
6,Maranhao,1493191.0,39604.0,542208.0,911379.0
7,Pernambuco,1299753.0,107667.0,434080.0,758006.0
8,Bahia,916803.0,28876.0,258945.0,628982.0
9,Espirito Santo,673696.0,27431.0,236166.0,410099.0


In [182]:
fig = px.pie(Data_brazil_per_state, values=Data_brazil_per_state['Confirmed'], names=Data_brazil_per_state['Province/State'],
             title='Confirmed cases in Brazil',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [183]:
fig = px.pie(Data_brazil_per_state, values=Data_brazil_per_state['Recovered'], names=Data_brazil_per_state['Province/State'],
             title='Recoverd cases in Brazil',
            hole=.2)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [215]:
fig = go.Figure(data=[go.Bar(
            x=Data_brazil_per_state['Province/State'][0:10], y=Data_brazil_per_state['Deaths'][0:10],
            text=Data_brazil_per_state['Deaths'][0:10],
            textposition='auto',
            marker_color='black'

        )])
fig.update_layout(
    title='Death in Brazil',
    xaxis_title="states",
    yaxis_title="Deaths",
        template='plotly_white'

)
fig.show()