# PRCP-1023-JohnsHopkinsCovid19

# Introduction

# Dataset Description

# Importing Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
# pip install plotly

In [3]:
# pip install "jupyterlab>=3" "ipywidgets>=7.6"

In [4]:
# pip install jupyter-dash

In [5]:
# pip install plotly_express==0.4.0

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
# Data Visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly_express as px
import plotly.graph_objects as go
import plotly.subplots as sp
%matplotlib inline

# Loading files

In [8]:
confirmed=pd.read_csv("confirmed.csv")
deaths=pd.read_csv("deaths.csv")
recovered=pd.read_csv("recovered.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'confirmed.csv'

In [None]:
confirmed.head()

# Exploratory Data Analysis

### Confirmed Cases

In [None]:
confirmed.head()

In [None]:
confirmed.shape

In [None]:
confirmed.columns

In [None]:
confirmed.dtypes

In [None]:
confirmed.info()

In [None]:
confirmed.describe()

### Death Cases

In [None]:
deaths.head()

In [None]:
deaths.shape

In [None]:
deaths.columns

In [None]:
deaths.dtypes

In [None]:
deaths.info()

In [None]:
deaths.describe()

### Recovered

In [None]:
recovered.head()

In [None]:
recovered.shape

In [None]:
recovered.columns

In [None]:
recovered.dtypes

In [None]:
recovered.info()

In [None]:
recovered.describe()

## Insights 

## Data Cleaning

In [None]:
confirmed.columns[4:]

### Merging Confirmed,Deaths and Recovered

In [None]:
dates = confirmed.columns[4:]
confirmed_new= confirmed.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='confirmed'
)
deaths_new= deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='deaths'
)
recovered_new = recovered.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='recovered'
)

In [None]:
confirmed_new

In [None]:
# we have to remove recovered data for Canada
#due to mismatch issue ( Canada recovered data is counted by Country-wise rather than Province/State-wise).#

recovered_new = recovered_new[recovered_new['Country/Region']!='Canada']

In [None]:
# we use merge() to merge the 3 DataFrames one after another

# Merging confirmed_new and deaths_new
full = confirmed_new.merge(
  right=deaths_new, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)
# Merging full and recovered_new
full = full.merge(
  right=recovered_new, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

## Data Wrangling

- Convert date to string to datetime
-  Handling Nan Values, replace with zero
- Coronavirus cases reported from 3 cruise ships should be treated differently
-  Date column are all string with m/dd/yy format. To convert Date values from string to datetime, let’s use DataFrame.to_datetime()

In [None]:

# let’s use DataFrame.to_datetime()

full['Date'] = pd.to_datetime(full['Date'])

In [None]:
full

In [None]:
full.isna().sum()

In [None]:
full['recovered'] = full['recovered'].fillna(0)

In [None]:
full.isna().sum()

In [None]:
full.head()

In [None]:
full.dtypes

In [None]:
#And here is how we extract the ship data.

ship_rows = full['Province/State'].str.contains('Grand Princess')|full['Province/State'].str.contains('Diamond Princess') |full['Country/Region'].str.contains('Diamond Princess') |full['Country/Region'].str.contains('MS Zaandam')
full_ship = full[ship_rows]

In [None]:
full_ship

In [None]:
#And to get rid of ship data from full_table :

full = full[~(ship_rows)]

In [None]:
full

## Data Aggregation

In [None]:
full['Active'] = full['confirmed'] - full['deaths'] - full['recovered']

In [None]:
full

In [None]:
full_new = full.groupby(['Date', 'Country/Region'])['confirmed', 'deaths', 'recovered', 'Active'].sum().reset_index()

In [None]:
full_new

In [None]:
#Now let’s add day wise New cases, New deaths and New recovered by deducting the corresponding accumulative data on the previous day.

# new cases 
temp = full_new.groupby(['Country/Region', 'Date', ])['confirmed', 'deaths', 'recovered']
temp = temp.sum().diff().reset_index()
mask = temp['Country/Region'] != temp['Country/Region'].shift(1)
temp.loc[mask, 'confirmed'] = np.nan
temp.loc[mask, 'deaths'] = np.nan
temp.loc[mask, 'recovered'] = np.nan
# renaming columns
temp.columns = ['Country/Region', 'Date', 'New cases', 'New deaths', 'New recovered']
# merging new values
full_new = pd.merge(full_new, temp, on=['Country/Region', 'Date'])
# filling na with 0
full_new = full_new.fillna(0)
# fixing data types
cols = ['New cases', 'New deaths', 'New recovered']
full_new[cols] = full_new[cols].astype('int')
# 
full_new['New cases'] = full_new['New cases'].apply(lambda x: 0 if x<0 else x)
#And finally here is the full_grouped. Be aware of that this final output is Country-wise data.


In [None]:
full_new

## Visualization

In [None]:
fig = px.pie(full_new, values='confirmed',names='Country/Region',title="Confirmed Cases ")
fig.update_traces(textposition="inside")
fig.update_layout(uniformtext_minsize=12,uniformtext_mode="hide")
fig.show()

In [None]:
fig = px.pie(full_new, values='deaths',names='Country/Region',title="Mortality Cases ")
fig.update_traces(textposition="inside")
fig.update_layout(uniformtext_minsize=12,uniformtext_mode="hide")
fig.show()

In [None]:
fig = px.pie(full_new, values='recovered',names='Country/Region',title="Recovered Cases ")
fig.update_traces(textposition="inside")
fig.update_layout(uniformtext_minsize=12,uniformtext_mode="hide")
fig.show()

In [None]:
plot = px.line(full_new, x='Date', y=['confirmed'], 
               title='Confirmed Cases')
plot.show()

In [None]:
plot = px.line(full_new, x='Date', y=['deaths'], 
               title='Mortality Case')
plot.show()

In [None]:
plot = px.line(full_new, x='Date', y=['recovered'], 
               title='Recovered Case')
plot.show()

In [None]:
plot = px.line(full_new, x='Date', y=['New cases'], 
               title='New Cases')
plot.show()

In [None]:
plot = px.line(full_new, x='Date', y=['Active'], 
               title='Active Cases')
plot.show()

In [None]:
# Checking with selected country i.e India
!pip install altair
India = full_new[full_new['Country/Region'] == 'India']

In [None]:
import altair as alt
base = alt.Chart(India).mark_bar().encode(
    x='monthdate(Date):O',
).properties(
    width=500
)


In [None]:
red=alt.value('#f54242')
base.encode(y='confirmed').properties(title='Total Confirmed')|base.encode(y='deaths', color=red).properties(title='Total Deaths')

In [None]:
red=alt.value('#f54242')
base.encode(y='recovered').properties(title='Total Recovered')|base.encode(y='Active', color=red).properties(title='Active')

In [None]:
red=alt.value('#f54242')
base.encode(y='New cases').properties(title='New Cases Confirmed')|base.encode(y='New deaths', color=red).properties(title='New Deaths')

In [None]:
# Insights from selected countries

countries = ['US', 'India', 'China', 'Brazil', 'Germany', 'Turkey', 'Italy', 'United Kingdom', 'Russia']
selected_countries = full_new[full_new['Country/Region'].isin(countries)]

In [None]:
selected_countries

In [None]:
#Let’s create a circle chart to display the day wise New cases,


alt.Chart(selected_countries).mark_circle().encode(
    x='monthdate(Date):O',
    y='Country/Region',
    color='Country/Region',
    size=alt.Size('New cases:Q',
        scale=alt.Scale(range=[0, 1000]),
        legend=alt.Legend(title='Daily new cases')
    ) 
).properties(
    width=800,
    height=300
)

In [None]:
#Let’s create a circle chart to display the day wise New death cases,


alt.Chart(selected_countries).mark_circle().encode(
    x='monthdate(Date):O',
    y='Country/Region',
    color='Country/Region',
    size=alt.Size('New deaths:Q',
        scale=alt.Scale(range=[0, 1000]),
        legend=alt.Legend(title='Death Cases')
    ) 
).properties(
    width=800,
    height=300
)

In [None]:
#Let’s create a circle chart to display the day wise New recovered cases,


alt.Chart(selected_countries).mark_circle().encode(
    x='monthdate(Date):O',
    y='Country/Region',
    color='Country/Region',
    size=alt.Size('New recovered:Q',
        scale=alt.Scale(range=[0, 1000]),
        legend=alt.Legend(title='Daily new cases')
    ) 
).properties(
    width=800,
    height=300
)

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots

from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')

In [None]:
#Grouping different types of cases as per the date
datewise=full_new.groupby(["Date"]).agg({"confirmed":'sum',"recovered":'sum',"deaths":'sum'})
datewise["Days Since"]=datewise.index-datewise.index.min()

In [None]:
#Growth rate of Confirmed, Recovered and Death Cases

import plotly.graph_objects as go

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["confirmed"],
                    mode='lines+markers',
                    name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["recovered"],
                    mode='lines+markers',
                    name='Recovered Cases'))
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["deaths"],
                    mode='lines+markers',
                    name='Death Cases'))
fig.update_layout(title="Growth of different types of cases",
                 xaxis_title="Date",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

## Mortality and Recovered Cases

In [None]:
#Calculating the Mortality Rate and Recovery Rate
datewise["Mortality Rate"]=(datewise["deaths"]/datewise["confirmed"])*100
datewise["Recovery Rate"]=(datewise["recovered"]/datewise["confirmed"])*100
datewise["Active Cases"]=datewise["confirmed"]-datewise["recovered"]-datewise["deaths"]
datewise["Closed Cases"]=datewise["recovered"]+datewise["deaths"]

print("Average Mortality Rate",datewise["Mortality Rate"].mean())
print("Median Mortality Rate",datewise["Mortality Rate"].median())
print("Average Recovery Rate",datewise["Recovery Rate"].mean())
print("Median Recovery Rate",datewise["Recovery Rate"].median())

#Plotting Mortality and Recovery Rate 
fig = make_subplots(rows=2, cols=1,
                   subplot_titles=("Recovery Rate", "Mortatlity Rate"))
fig.add_trace(
    go.Scatter(x=datewise.index, y=(datewise["recovered"]/datewise["confirmed"])*100,name="Recovery Rate"),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=datewise.index, y=(datewise["deaths"]/datewise["confirmed"])*100,name="Mortality Rate"),
    row=2, col=1
)
fig.update_layout(height=1000,legend=dict(x=-0.1,y=1.2,traceorder="normal"))
fig.update_xaxes(title_text="Date", row=1, col=1)
fig.update_yaxes(title_text="Recovery Rate", row=1, col=1)
fig.update_xaxes(title_text="Date", row=1, col=2)
fig.update_yaxes(title_text="Mortality Rate", row=1, col=2)
fig.show()

## Data Preprocessing 

# Prediction using Machine Learning Models

In [None]:
import matplotlib.pyplot as plt
import datetime as dt
from datetime import timedelta
import seaborn as sns
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error,r2_score

## Linear Regression

In [None]:
datewise["Days Since"]=datewise.index - datewise.index[0]
datewise["Days Since"]=datewise["Days Since"].dt.days

# Splitting the data

train_ml=datewise.iloc[ : int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95) : ]
model_scores=[]

#Fitting

lin_reg=LinearRegression(normalize=True)
lin_reg.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["confirmed"]).reshape(-1,1))


In [None]:
#Modelling

prediction_valid_linreg=lin_reg.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

model_scores.append(np.sqrt(mean_squared_error(valid_ml["confirmed"],prediction_valid_linreg)))
print("Root Mean Square Error for Linear Regression: ",np.sqrt(mean_squared_error(valid_ml["confirmed"],prediction_valid_linreg)))

In [None]:
plt.figure(figsize=(11,6))
prediction_linreg=lin_reg.predict(np.array(datewise["Days Since"]).reshape(-1,1))
linreg_output=[]
for i in range(prediction_linreg.shape[0]):
    linreg_output.append(prediction_linreg[i][0])

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=linreg_output,
                    mode='lines',name="Linear Regression Best Fit Line",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Linear Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

### Prediction for next 30 Days

In [None]:
new_date=[]
new_prediction_lr=[]

for i in range(1,30):
    new_date.append(datewise.index[-1]+timedelta(days=i))
    new_prediction_lr.append(lin_reg.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0][0])

    
    pd.set_option("display.float_format",lambda x: '%.f'%x)
    model_predictions=pd.DataFrame(zip(new_date,new_prediction_lr,),columns=["Dates","LR"])

In [None]:
model_predictions.head(30)

## Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures


train_ml=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95):]

poly = PolynomialFeatures(degree = 8) 
train_poly=poly.fit_transform(np.array(train_ml["Days Since"]).reshape(-1,1))
valid_poly=poly.fit_transform(np.array(valid_ml["Days Since"]).reshape(-1,1))
y=train_ml["confirmed"]

linreg=LinearRegression(normalize=True)
linreg.fit(train_poly,y)

In [None]:
prediction_poly=linreg.predict(valid_poly)
rmse_poly=np.sqrt(mean_squared_error(valid_ml["confirmed"],prediction_poly))
model_scores.append(rmse_poly)
print("Root Mean Squared Error for Polynomial Regression: ",rmse_poly)

In [None]:
comp_data=poly.fit_transform(np.array(datewise["Days Since"]).reshape(-1,1))
plt.figure(figsize=(11,6))
predictions_poly=linreg.predict(comp_data)

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=predictions_poly,
                    mode='lines',name="Polynomial Regression Best Fit",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Polynomial Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",
                 legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

## Prediction for next 30 Days

In [None]:
new_prediction_poly=[]
for i in range(1,31):
    new_date_poly=poly.fit_transform(np.array(datewise["Days Since"].max()+i).reshape(-1,1))
    new_prediction_poly.append(linreg.predict(new_date_poly)[0])
    
    pd.set_option("display.float_format",lambda x: '%.f'%x)
    model_predictions=pd.DataFrame(zip(new_date,new_prediction_poly),columns=["Dates","POLY"])

In [None]:
model_predictions.head(30)

## Support Vector Machine

In [None]:
datewise["Days Since"]=datewise.index-datewise.index[0]
datewise["Days Since"]=datewise["Days Since"].dt.days

train_ml=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95):]
model_scores=[]

svm=SVR(C=1,degree=5,kernel='poly',epsilon=0.001)

svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["confirmed"]).reshape(-1,1))

In [None]:
prediction_valid_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

model_scores.append(np.sqrt(mean_squared_error(valid_ml["confirmed"],prediction_valid_svm)))
print("Root Mean Square Error for Support Vectore Machine: ",np.sqrt(mean_squared_error(valid_ml["confirmed"],prediction_valid_svm)))


In [None]:
plt.figure(figsize=(11,6))
prediction_linreg=lin_reg.predict(np.array(datewise["Days Since"]).reshape(-1,1))
linreg_output=[]
for i in range(prediction_linreg.shape[0]):
    linreg_output.append(prediction_linreg[i][0])

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=linreg_output,
                    mode='lines',name="Best Fit Line",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases  Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

## Prediction for next 30 days

In [None]:

new_date=[]
new_prediction_svm=[]

for i in range(1,30):
    new_date.append(datewise.index[-1]+timedelta(days=i))
    new_prediction_svm.append(svm.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0])

    
    pd.set_option("display.float_format",lambda x: '%.f'%x)
    model_predictions=pd.DataFrame(zip(new_date,new_prediction_svm),columns=["Dates","SVM"])


In [None]:
    model_predictions.head(30)

# ARIMA MODEL

In [None]:
from scipy.stats import *
import statsmodels.api as sm #for ARIMA and SARIMAX
import datetime
from datetime import timedelta


In [None]:
from statsmodels.tsa.stattools import adfuller #adfuller stands for Augmented Dickey-Fuller unit root test.

#The function find mean and standard deviation of the series and and performs augmented dickey fuller test.
#returns pvale .. The samaller the pvalue more stationary is the series.

def test_stationarity(timeseries, window = 15, cutoff = 0.01):
    rolmean = timeseries.rolling(window).mean()
    rolstd = timeseries.rolling(window).std()
    fig = plt.figure(figsize=(12, 8))
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show()

    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC',)
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    pvalue = dftest[1]
    if pvalue < cutoff:
        print('p-value = %.4f. The series is likely stationary.' % pvalue)
    else:
        print('p-value = %.4f. The series is likely non-stationary.' % pvalue)
  
    print(dfoutput)

In [None]:
test_stationarity(full_new['confirmed'])

In [None]:
first_diff = full_new.confirmed - full_new.confirmed.shift(4)
first_diff = first_diff.dropna(inplace = False)
test_stationarity(first_diff, window = 12)

In [None]:
import statsmodels.api as sm

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(full_new.confirmed, ax=ax1, ) # using default value of lag
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(full_new.confirmed, ax=ax2) # using default value of lag

In [None]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(first_diff, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(first_diff, ax=ax2)

In [None]:

sarimax_mod = sm.tsa.statespace.SARIMAX(full_new.confirmed, trend='n', order=(14,1,0)).fit()
print(sarimax_mod.summary())

In [None]:
import scipy.stats as stats

resid = sarimax_mod.resid #gives residual degree of freedom (mu, sigma, pvalue ... )
print(normaltest(resid))

fig = plt.figure(figsize=(12,8))
ax0 = fig.add_subplot(111)

sns.distplot(resid ,fit = stats.norm, ax = ax0) # need to import scipy.stats

# Get the fitted parameters used by the function
(mu, sigma) = stats.norm.fit(resid)

#Now plot the distribution using 
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best')
plt.ylabel('Frequency')
plt.title('Residual distribution')


# ACF and PACF
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(sarimax_mod.resid, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(sarimax_mod.resid, ax=ax2)

In [None]:
pip install pmdarima

In [None]:
full_new

In [None]:
from statsmodels.tsa.stattools import adfuller  
from numpy import log  
import pandas as pd  
  
mydata = full_new 
  
res = adfuller( mydata.confirmed.dropna())  
print('Augmented Dickey-Fuller Statistic: %f' % res[0])  
print('p-value: %f' % res[1])  

In [None]:
import numpy as np, pandas as pd  
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf  
import matplotlib.pyplot as plt  
  
plt.rcParams.update({'figure.figsize' : (9,7), 'figure.dpi' : 120})  
 
mydata = full_new 

df = full_new

# The Genuine Series  
fig, axes = plt.subplots(3, 2, sharex = True)  
axes[0, 0].plot(df.confirmed); axes[0, 0].set_title('The Genuine Series')  
plot_acf(df.confirmed, ax = axes[0, 1])  
  
# Order of Differencing: First  
axes[1, 0].plot(df.confirmed.diff()); axes[1, 0].set_title('Order of Differencing: First')  
plot_acf(df.confirmed.diff().dropna(), ax = axes[1, 1])  
  
# Order of Differencing: Second  
axes[2, 0].plot(df.confirmed.diff().diff()); axes[2, 0].set_title('Order of Differencing: Second')  
plot_acf(df.confirmed.diff().diff().dropna(), ax = axes[2, 1])  
  
plt.show()

In [None]:
from pmdarima.arima.utils import ndiffs  
import pandas as pd  
  
df = full_new 
X = df.confirmed  
  
# Augmented Dickey Fuller Test  
adftest = ndiffs(X, test = 'adf')  
  
# KPSS Test  
kpsstest = ndiffs(X, test = 'kpss')  
  
# PP Test  
pptest = ndiffs(X, test = 'pp')  
  
print("ADF Test =", adftest)  
print("KPSS Test =", kpsstest)  
print("PP Test =", pptest)  

In [None]:
import numpy as np, pandas as pd  
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf  
import matplotlib.pyplot as plt  
  
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})  
  
df = full_new 
  
fig, axes = plt.subplots(1, 2, sharex = True)  
axes[0].plot(df.confirmed.diff()); axes[0].set_title('Order of Differencing: First')  
axes[1].set(ylim = (0,5))  
plot_pacf(df.confirmed.diff().dropna(), ax = axes[1])  
  
plt.show()  

In [None]:
import numpy as np, pandas as pd  
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf  
import matplotlib.pyplot as plt  
  
plt.rcParams.update({'figure.figsize' : (9,3), 'figure.dpi' : 120})  
  
mydata = full_new 
  
fig, axes = plt.subplots(1, 2, sharex = True)  
axes[0].plot(mydata.confirmed.diff()); axes[0].set_title('Order of Differencing: First')  
axes[1].set(ylim = (0, 1.2))  
plot_acf(mydata.confirmed.diff().dropna(), ax = axes[1])  
  
plt.show()  

In [None]:
pip install statsmodels

# Building the ARIMA Model

In [None]:
from statsmodels.tsa.arima.model import ARIMA

In [None]:
import numpy as np, pandas as pd  
 

mydata = full_new 
  
# Creating ARIMA model  
mymodel = ARIMA(full_new.confirmed, order = (0, 1, 0))  
modelfit = mymodel.fit()  
print(modelfit.summary())

In [None]:
pip install statsmodels

In [None]:
from statsmodels.tsa.arima.model import ARIMA

In [None]:
import numpy as np, pandas as pd  
  


mydata = full_new 
  
# Creating ARIMA model  
mymodel = ARIMA(full_new.confirmed, order = (2, 2, 2))  
modelfit = mymodel.fit()  
print(modelfit.summary())
 

In [None]:
pip install statsmodels

In [None]:
 pip install statsmodels --upgrade

In [None]:
from statsmodels.tsa.arima_model import ARIMA  

In [None]:
import numpy as np, pandas as pd  
from statsmodels.tsa.arima_model import ARIMA  
import matplotlib.pyplot as plt  
  
plt.rcParams.update({'figure.figsize' : (9,3), 'figure.dpi' : 120})  
  

mydata = full_new 
  
# Creating ARIMA model  
mymodel = ARIMA(full_new.confirmed, order = (0, 1, 0)) 
modelfit = mymodel.fit(disp = 0)  
  
# Actual vs Fitted  
modelfit.plot_predict(dynamic = False)  
plt.show()

# Conclusion