In [None]:
# import packages
import pandas as pd
import numpy as np
import urllib.request as request
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot 
import datetime
from ipywidgets import interact, interactive,fixed,interact_manual
import ipywidgets as widgets
import csv
from sklearn.model_selection import  train_test_split
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import plotly.io as pio
pio.renderers.default = 'notebook'

In [None]:
#Download data
confirmed_df=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
recovered_df=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
death_df=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
cv19_cases_by_country = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/12-15-2020.csv")

In [None]:
cv19_confirmed_global =confirmed_df
cv19_deaths_global =death_df
cv19_recovered_global = recovered_df

In [None]:
# renaming
confirmed_df.columns=map(str.lower,confirmed_df.columns)
recovered_df.columns=map(str.lower,recovered_df.columns)
death_df.columns=map(str.lower,death_df.columns)
confirmed_df=confirmed_df.rename(columns={'province/state':'state','country/region':'country'})
recovered_df=recovered_df.rename(columns={'province/state':'state','country/region':'country'})
death_df=death_df.rename(columns={'province/state':'state','country/region':'country'})

In [None]:
clean_confirmed_data=confirmed_df.groupby('country').sum().reset_index()
clean_recovered_data=recovered_df.groupby('country').sum().reset_index()
clean_death_data=death_df.groupby('country').sum().reset_index()

In [None]:
covid_confirmed_count = confirmed_df.iloc[:, 4:].sum().max()
covid_deaths_count = death_df.iloc[:, 4:].sum().max()
covid_recovered_count = recovered_df.iloc[:, 4:].sum().max()
world_df = pd.DataFrame({
    'confirmed': [covid_confirmed_count],
    'deaths': [covid_deaths_count],
    'recovered': [covid_recovered_count],
    'active': [covid_confirmed_count - covid_deaths_count - covid_recovered_count]
})

# World infected map over time from 2/1/20

In [None]:
clean_confirmed_data.loc[:, ['lat', 'long']] = clean_confirmed_data.groupby('country').mean().reset_index().loc[:, ['lat', 'long']]
country_df_month=clean_confirmed_data.loc[:,['country','lat','long','2/1/20','3/1/20','4/1/20','5/1/20','6/1/20','7/1/20','8/1/20'
                                             ,'9/1/20','10/1/20','11/1/20','12/1/20','1/1/21']]
covid_confirmed_agg_long = pd.melt(country_df_month,
                                   id_vars=country_df_month.iloc[:, :3],
                                   var_name='date',
                                   value_vars=country_df_month.iloc[:, 3:],
                                   value_name='date_confirmed_cases')
fig = px.scatter_geo(covid_confirmed_agg_long,
                     lat="lat", lon="long", color="country",
                     hover_name="country", size="date_confirmed_cases",
                     size_max=50, animation_frame="date",
                     projection="natural earth",
                     title="COVID-19 worldwide confirmed cases over time")
fig.show()

# Top infected Countries

In [None]:
country_df=clean_confirmed_data[['country']]
country_df['Confirmed']=clean_confirmed_data.loc[:,'1/12/21']
country_df['Death']=clean_death_data.loc[:,'1/12/21']
country_df['Recovered']=clean_recovered_data.loc[:,'1/12/21']
def draw_scatter(number,content):
    #fig=px.scatter(sorted_country_df.head(number),x='country',y=content,size=content,color='country',hover_name="country",size_max=60)
    sorted_country_df=country_df.sort_values(content,ascending=False).head(191)
    fig2 = px.bar(sorted_country_df.head(number), x='country', y=content, color="country",
                  hover_data=[content], height=400,title="Top infected Countries")

    fig2.show()
    #fig.show()
interact(draw_scatter, number=widgets.IntSlider(min=1,max=30,step=1,value=10),content=["Confirmed","Recovered","Death"]);

In [None]:

def plot_cases_for_country(country,Worldwide):
    labels=['confirmed','deaths','recovered']
    colors=['blue','red','green']
    line_size=[1,1,1]

    df_list=[confirmed_df,death_df,recovered_df]
    
    fig1=go.Figure()
    
    for i,df in enumerate(df_list):
        if Worldwide==True:
            x_data=np.array(list(df.iloc[:,5:].columns))
            y_data=np.sum(np.asarray(df.iloc[:,5:]),axis=0)
        else:
            x_data=np.array(list(df.iloc[:,5:].columns))
            y_data=np.sum(np.asarray(df[df['country']==country].iloc[:,5:]),axis=0)
                    
        fig1.add_trace(go.Scatter(x=x_data,y=y_data,mode='lines',name=labels[i],
                                 line=dict(color=colors[i],width=line_size[i]),
                                 connectgaps=True,
                                 text="Total "+ str(labels[i])+": "+str(y_data[-1])
                                ))
    fig1.update_layout(
    title="Covid-19 Cases Daily Increase Timeseries",
    xaxis_title="Date",
    yaxis_title="Country",)
    fig1.show()

#plot_cases_for_country('China')
#interact(plot_cases_for_country,country='World');

# Covid-19 cases daily increase timeseries

In [None]:
interact(plot_cases_for_country,country=confirmed_df['country'],Worldwide=False)

In [None]:
cv19_malaysia_cases = cv19_cases_by_country[cv19_cases_by_country['Country_Region'] == "Malaysia"]
cv19_malaysia_data = cv19_malaysia_cases.copy().drop(['FIPS', 'Admin2', 'Province_State', 'Country_Region', 'Last_Update', 'Lat', 'Long_', 'Combined_Key', 'Incident_Rate', 'Case_Fatality_Ratio'], axis=1)
cv19_malaysia_summary = pd.DataFrame(cv19_malaysia_data.sum()).transpose()

In [None]:
cv19_confirmed_malaysia = cv19_confirmed_global[cv19_confirmed_global['country/region'] == "Malaysia"]
cv19_confirmed_malaysia_ts = cv19_confirmed_malaysia.copy().drop(['province/state', 'country/region', 'lat', 'long'], axis=1)
cv19_confirmed_malaysia_ts_summary = cv19_confirmed_malaysia_ts.sum()

# Total Confirmed Coronavirus Cases in Malaysia

In [None]:
fig_1 = go.Figure(data=go.Scatter(x=cv19_confirmed_malaysia_ts_summary.index, y=cv19_confirmed_malaysia_ts_summary.values, mode='lines+markers'))
fig_1.update_layout(title='Total Confirmed Coronavirus Cases in Malaysia', yaxis_title='Confirmed Cases', xaxis_tickangle=315)
fig_1.show()

In [None]:
color_arr = px.colors.qualitative.Dark24

In [None]:
def draw_plot(ts_array, ts_label, colors, mode_size, line_size, x_axis_title, y_axis_title, yaxis_type, title, additional_annotations, tickangle = 0):
    #initialize figure
    fig = go.Figure()
    #add all traces
    for index, ts in enumerate(ts_array):
        fig.add_trace(go.Scatter(x=ts.index, y=ts.values, name=ts_label[index], line=dict(color=colors[index], width=line_size[index]), connectgaps=True))

    #base x_axis prop
    x_axis_dict = dict(showline=True, showgrid=True, showticklabels=True, linecolor='rgb(204, 204, 204)', linewidth=2, ticks='outside', tickfont=dict(family='Arial', size=12, color='rgb(82, 82, 82)'))
    #setting x_axis params
    if x_axis_title:
        x_axis_dict['title'] = x_axis_title
    if tickangle > 0:
        x_axis_dict['tickangle'] = tickangle

    #base y_axis prop
    y_axis_dict = dict(showline=True, showgrid=True, showticklabels=True, linecolor='rgb(204, 204, 204)', linewidth=2)
    #setting y_axis params
    if yaxis_type := '':
        y_axis_dict['type'] = yaxis_type
    if y_axis_title:
        y_axis_dict['title'] = y_axis_title

    #updating layout
    fig.update_layout(xaxis = x_axis_dict, yaxis=y_axis_dict, autosize=True, margin=dict(autoexpand=True, l=100, r=20, t=110), showlegend=True)

    #base annotation for any graph
    annotations = []
    #Title
    annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05, xanchor='left', yanchor='bottom', text=title, font=dict(family='Arial', size=16, color='rgb(37, 37, 37)'), showarrow=False))
    #adding annotations in params
    if len(additional_annotations) > 0:
        annotations.append(additional_annotations)

    fig.update_layout(annotations=annotations)

    return fig

In [None]:
cv19_deaths_malaysia_ts_summary = cv19_deaths_global[cv19_deaths_global["country/region"] == "Malaysia"].copy().drop(['province/state', 'country/region', 'lat', 'long'], axis=1).sum()
cv19_recovered_malaysia_ts_summary = cv19_recovered_global[cv19_recovered_global["country/region"] == "Malaysia"].copy().drop(['province/state', 'country/region', 'lat', 'long'], axis=1).sum()

cv19_active_malaysia_ts_summary = pd.Series(
    data=np.array(
        [x1 - x2 - x3 for (x1, x2, x3) in zip(cv19_confirmed_malaysia_ts_summary.values, cv19_deaths_malaysia_ts_summary.values, cv19_recovered_malaysia_ts_summary.values)]
    ),
    index=cv19_confirmed_malaysia_ts_summary.index
)

# Covid-19 Malaysia Timeseries

In [None]:
ts_array = [cv19_confirmed_malaysia_ts_summary, cv19_active_malaysia_ts_summary, cv19_recovered_malaysia_ts_summary, cv19_deaths_malaysia_ts_summary]
labels = ['Confirmed', 'Active', 'Recovered', 'Deaths']
colors = [color_arr[5], color_arr[0], color_arr[2], color_arr[3]]
mode_size = [8, 8, 8, 8]
line_size = [2, 2, 2, 2]

fig_2 = draw_plot(ts_array = ts_array, ts_label = labels, title = 'Covid-19 Malaysia Timeseries', colors = colors, mode_size = mode_size, line_size = line_size, x_axis_title = 'Date', y_axis_title = 'Cases', tickangle = 500, yaxis_type = '', additional_annotations=[])

fig_2.show()

# Prediction for Future Data

In [None]:
pd.set_option('display.max_columns',1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth',1000)
# .csv包含93列：Province/State表示省份，Country/Region表示国家，Lat，Long分别表示经纬度，后面每一列分别表示具体日期
global confirmed_global_df
confirmed_global_df=clean_confirmed_data
cols = confirmed_global_df.keys()#列名
dates = confirmed_global_df.loc[:, cols[4]:].keys()#提取日期列名
global  start_date,end_date
start_date=0
end_date=30

In [None]:
def print_file(days_in_future,country,end_date):
    if (end_date<350):
        future_forcast = np.array([i for i in range(end_date - start_date + days_in_future)]).reshape(-1, 1)#未来预测序数列
        first='1/22'
        first_day = datetime.datetime.strptime(first, '%m/%d')
        future_forcast_dates = []
        confirmed_Province_df = confirmed_global_df.loc[confirmed_global_df["country"] == country]
        for i in range(len(future_forcast)):
            future_forcast_dates.append((first_day + datetime.timedelta(days=i+start_date)).strftime('%m/%d'))


        adjusted_dates = future_forcast_dates[:-days_in_future]#矫正后日期star-end,eg:'1/22','1/23'……
        dates_array = np.array([i for i in range(end_date-start_date)]).reshape(-1, 1)#开始到结束日准确序数列[1，2，3，……]
        confirmed_Province_array = np.array(confirmed_Province_df.iloc[:, start_date+4:end_date+4]).reshape(-1, 1)#准确患者人数列
        # 将从1月22号的人员确诊数据分成0.9的训练集，0.1的测试集
        X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(dates_array,confirmed_Province_array, test_size=0.1, shuffle=False)

        Logistic=LogisticRegression(C = 5, penalty = 'l2',max_iter=50000)
        Logistic.fit(X_train_confirmed,y_train_confirmed.ravel())

        Logistic_pred = Logistic.predict(future_forcast)
        #多项式回归
        poly_reg = PolynomialFeatures(degree=3)
        x_poly = poly_reg.fit_transform(X_train_confirmed)
        linear_reg = LinearRegression()
        linear_reg.fit(x_poly, y_train_confirmed.ravel())
        

        fig= go.Figure()
        fig.add_trace(go.Scatter(x=future_forcast_dates, y= Logistic_pred, mode='lines+markers',line=dict(color='firebrick', width=4,
                              dash='dash')))
        fig.add_trace(go.Scatter(x=future_forcast_dates,
                      y=linear_reg.predict(poly_reg.fit_transform(future_forcast)),
                      mode='lines+markers',line=dict(color='royalblue', width=4)))
        fig.update_layout(title='Confirmed Coronavirus Cases Over Time', yaxis_title='confirmed Cases', xaxis_title='Time in Days')
        fig.show()
    else:
        print("Please enter a number lower than 350.")
style = {'description_width': 'initial'}
interact(print_file,days_in_future=widgets.IntSlider(description='Forecast days',value=1,disabled=False,style=style),country=confirmed_global_df["country"],end_date=widgets.IntText(description='Used Days from 01/22/2020',style=style,min=10,max=350,step=1,value=80))