## COVID-19 FORECAST USING KUBEFLOW FAIRING ON UCS

## Clone Cisco Kubeflow starter pack repository

In [None]:
BRANCH_NAME="dev" #Provide git branch "master" or "dev"
! git clone -b $BRANCH_NAME https://github.com/CiscoAI/cisco-kubeflow-starter-pack.git

## Configure Docker Credentials

Get your docker registry user name and password encoded in base64 

echo -n USERNAME:PASSWORD | base64 

Create a config.json file with your Docker registry url and the previous generated base64 string 

In [None]:
!echo -n USERNAME:PASSWORD | base64

In [None]:
%%writefile config.json
{
    "auths": {
        "https://index.docker.io/v1/": {
            "auth": "<<Provide previous generated base64 string>>"
        }
    }
}

## Create Requirements.txt

In [13]:
%%writefile requirements.txt
pandas
keras
seldon-core
tornado>=6.0.3
kubeflow-fairing
tensorflow==1.13.1
cloudpickle==1.1.1
kubernetes==10.0.1
matplotlib
plotly_express

Overwriting requirements.txt


## Install Requirements

In [14]:
!pip install --user -r requirements.txt





You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


## Restart Notebook Kernel

In [None]:
from IPython.display import display_html
display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import datetime
import re,os
import logging
import sys
import importlib
import time
import tensorflow as tf
import keras
from keras.models import Model
from keras import layers
from keras import Input
from keras import optimizers

from datetime import timedelta
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


## Set up Kubeflow Fairing for training and predictions on On-premise
Import the fairing library and configure the onprem environment that your training or prediction job will run in.

In [2]:
from kubernetes import client as k8s_client
from kubernetes.client import rest as k8s_rest
from kubernetes import config as k8s_config
from kubernetes.client.rest import ApiException

from kubeflow import fairing   
from kubeflow.fairing import utils as fairing_utils
from kubeflow.fairing import TrainJob
from kubeflow.fairing.preprocessors.function import FunctionPreProcessor
from kubeflow.fairing.preprocessors import base as base_preprocessor
from kubeflow.fairing.builders.cluster.cluster import ClusterBuilder

from kubeflow.fairing.cloud.k8s import MinioUploader
from kubeflow.fairing.builders.cluster.minio_context import MinioContextSource
from kubeflow.fairing import PredictionEndpoint
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes.utils import mounting_pvc

BackendClass = getattr(importlib.import_module('kubeflow.fairing.backends'), "KubernetesBackend")
namespace = fairing_utils.get_current_k8s_namespace()
print("Namespace : %s"%namespace)

Namespace : anonymous


## Get minio-service cluster IP to upload docker build context
#### Set DOCKER_REGISTRY
The DOCKER_REGISTRY variable is used to push the newly built image. 
Please change the variable to the registry for which you've configured credentials.

In [3]:
DOCKER_REGISTRY = "poornimadevii"

k8s_config.load_incluster_config()
api_client = k8s_client.CoreV1Api()
custom_api=k8s_client.CustomObjectsApi()
minio_service_endpoint = None

try:
    minio_service_endpoint = api_client.read_namespaced_service(name='minio-service', namespace='kubeflow').spec.cluster_ip
except ApiException as e:
    if e.status == 403:
        logging.warning(f"The service account doesn't have sufficient privileges "
                      f"to get the kubeflow minio-service. "
                      f"You will have to manually enter the minio cluster-ip. "
                      f"To make this function work ask someone with cluster "
                      f"priveleges to create an appropriate "
                      f"clusterrolebinding by running a command.\n"
                      f"kubectl create --namespace=kubeflow rolebinding "
                       "--clusterrole=kubeflow-view "
                       "--serviceaccount=${NAMESPACE}:default-editor "
                       "${NAMESPACE}-minio-view")
        logging.error("API access denied with reason: {e.reason}")

s3_endpoint = minio_service_endpoint
minio_endpoint = "http://"+s3_endpoint+":9000"
minio_username = "minio"
minio_key = "minio123"
minio_region = "us-east-1"
print(minio_endpoint)


minio_uploader = MinioUploader(endpoint_url=minio_endpoint, minio_secret=minio_username, minio_secret_key=minio_key, region_name=minio_region)
minio_context_source = MinioContextSource(endpoint_url=minio_endpoint, minio_secret=minio_username, minio_secret_key=minio_key, region_name=minio_region)

http://10.98.188.38:9000


### Create a config-map in the namespace you're using with the docker config

In [None]:
!kubectl create --namespace $namespace configmap docker-config --from-file=./config.json

## Define required paths to train & test data files

In [4]:
main_path = 'cisco-kubeflow-starter-pack/apps/healthcare/covid-forecasting/onprem/'
train_data_path = main_path + 'data/train.csv'
test_data_path = main_path + 'data/test.csv'

## Build Docker Image

In [5]:
output_map= {
    os.path.join(main_path, "fairing/Dockerfile"): "Dockerfile",
    os.path.join(main_path, "fairing/covid-model.py"):"covid-model.py",
    os.path.join(train_data_path): "train.csv",
    os.path.join(test_data_path): "test.csv"
}

# output_map= {
#       "Dockerfile": "Dockerfile",
#       "covid-model.py" : "covid-model.py",
#       "train.csv" : "train.csv",
#       "test.csv" : "test.csv"
# }

preprocessor = base_preprocessor.BasePreProcessor(output_map=output_map, input_files=['requirements.txt'])

preprocessor.preprocess()
builder = ClusterBuilder(registry=DOCKER_REGISTRY, preprocessor=preprocessor, context_source=minio_context_source)

builder.build()

[I 200513 09:29:42 cluster:46] Building image using cluster builder.
[I 200513 09:29:42 base:107] Creating docker context: /tmp/fairing_context_uukzpy8g
[W 200513 09:29:42 base:94] /tmp/fairing_dockerfile_w09jkknb already exists in Fairing context, skipping...
[W 200513 09:29:43 manager:296] Waiting for fairing-builder-pfgt5-wgrkg to start...
[W 200513 09:29:43 manager:296] Waiting for fairing-builder-pfgt5-wgrkg to start...
[W 200513 09:29:43 manager:296] Waiting for fairing-builder-pfgt5-wgrkg to start...
[W 200513 09:29:45 manager:296] Waiting for fairing-builder-pfgt5-wgrkg to start...
[I 200513 09:29:52 manager:302] Pod started running True


[36mINFO[0m[0001] Resolved base name python:3.7-slim-buster to python:3.7-slim-buster
[36mINFO[0m[0001] Resolved base name python:3.7-slim-buster to python:3.7-slim-buster
[36mINFO[0m[0001] Downloading base image python:3.7-slim-buster
[36mINFO[0m[0002] Error while retrieving image from cache: getting file info: stat /cache/sha256:0f322e5066a6c5c643829739dc93ea8ab73204abdea63b15af700fe6efd2ce4f: no such file or directory
[36mINFO[0m[0002] Downloading base image python:3.7-slim-buster
[36mINFO[0m[0003] Built cross stage deps: map[]
[36mINFO[0m[0003] Downloading base image python:3.7-slim-buster
[36mINFO[0m[0004] Error while retrieving image from cache: getting file info: stat /cache/sha256:0f322e5066a6c5c643829739dc93ea8ab73204abdea63b15af700fe6efd2ce4f: no such file or directory
[36mINFO[0m[0004] Downloading base image python:3.7-slim-buster
[36mINFO[0m[0004] Unpacking rootfs as cmd RUN pip install tensorflow==1.13.1 --no-cache-dir requires it.
[36mINFO[0m[0009] T

In [6]:
builder.image_tag

'poornimadevii/fairing-job:EF894B62'

## Create Katib Experiment

In [9]:
# Covid example
experiment={
  "apiVersion": "kubeflow.org/v1alpha3",
  "kind": "Experiment",
  "metadata": {
    "namespace": "kubeflow",
    "labels": {
      "controller-tools.k8s.io": "1.0"
     
    },
    "name": "covid4"
  },
  "spec": {
    "objective": {
      "type": "maximize",
      "goal": 0.99,
      "objectiveMetricName": "Validation-accuracy",
#       "additionalMetricNames": [
#         "Train-accuracy"
#       ]
  },
    "algorithm": {
      "algorithmName": "bayesianoptimization"
    },
    "parallelTrialCount": 5,
    "maxTrialCount": 5,
    "maxFailedTrialCount": 3,
    "parameters": [
      {
        "name": "--batch-size",
        "parameterType": "categorical",
        "feasibleSpace": {
          "list": [
            "16",
            "32",
            "48",
            "64"
          ]
        }
      },
#         {
#         "name": "--epochs",
#         "parameterType": "categorical",
#         "feasibleSpace": {
#           "list": [
#             "5",
#             "10",
#             "15",
#             "20"
#           ]
#         }
#       }
     
     {
        "name": "--optimizer",
        "parameterType": "categorical",
        "feasibleSpace": {
          "list": [
            "adam",
            "sgd"
          ]
        }
      }    
    ],
    "trialTemplate": {
      "goTemplate": {
        "rawTemplate": "apiVersion: batch/v1\nkind: Job\nmetadata:\n  name: {{.Trial}}\n  namespace: {{.NameSpace}}\nspec:\n  template:\n    spec:\n      containers:\n      - name: {{.Trial}}\n        image: %s\n        command:\n        - \"python3\"\n        - \"/opt/covid-model.py\"\n        {{- with .HyperParameters}}\n        {{- range .}}\n        - \"{{.Name}}={{.Value}}\"\n        {{- end}}\n        {{- end}}\n      restartPolicy: Never"%builder.image_tag
      }
    }
  }
}

experiment_name=experiment["metadata"]["name"]
custom_api.create_namespaced_custom_object(group="kubeflow.org", version="v1alpha3", namespace="kubeflow", plural="experiments", body=experiment)

{'apiVersion': 'kubeflow.org/v1alpha3',
 'kind': 'Experiment',
 'metadata': {'creationTimestamp': '2020-05-13T09:47:27Z',
  'generation': 1,
  'labels': {'controller-tools.k8s.io': '1.0'},
  'name': 'covid4',
  'namespace': 'kubeflow',
  'resourceVersion': '9038772',
  'selfLink': '/apis/kubeflow.org/v1alpha3/namespaces/kubeflow/experiments/covid4',
  'uid': '56046c00-0ed7-4cf6-ab68-957360cb8e9b'},
 'spec': {'algorithm': {'algorithmName': 'bayesianoptimization'},
  'maxFailedTrialCount': 3,
  'maxTrialCount': 5,
  'objective': {'goal': 0.99,
   'objectiveMetricName': 'Validation-accuracy',
   'type': 'maximize'},
  'parallelTrialCount': 5,
  'parameters': [{'feasibleSpace': {'list': ['16', '32', '48', '64']},
    'name': '--batch-size',
    'parameterType': 'categorical'},
   {'feasibleSpace': {'list': ['adam', 'sgd']},
    'name': '--optimizer',
    'parameterType': 'categorical'}],
  'trialTemplate': {'goTemplate': {'rawTemplate': 'apiVersion: batch/v1\nkind: Job\nmetadata:\n  name: 

In [None]:
# Mnist example

experiment={
  "apiVersion": "kubeflow.org/v1alpha3",
  "kind": "Experiment",
  "metadata": {
    "namespace": "anonymous",
    "labels": {
      "controller-tools.k8s.io": "1.0"
    },
    "name": "mnist"
  },
  "spec": {
    "objective": {
      "type": "maximize",
      "goal": 0.99,
      "objectiveMetricName": "Validation-accuracy",
  },
    "algorithm": {
      "algorithmName": "bayesianoptimization"
    },
    "parallelTrialCount": 5,
    "maxTrialCount": 5,
    "maxFailedTrialCount": 3,
    "parameters": [
      {
        "name": "--lr",
        "parameterType": "double",
        "feasibleSpace": {
          "min": "0.01",
          "max": "0.03"
        }
      },
        {
        "name": "--num-layers",
        "parameterType": "int",
        "feasibleSpace": {
          "min": "2",
          "max": "5"
        }
      },
     {
        "name": "--optimizer",
        "parameterType": "categorical",
        "feasibleSpace": {
          "list": [
            "adam",
            "sgd",
            "ftrl"
          ]
        }
      }    ],
    "trialTemplate": {
      "goTemplate": {
        "rawTemplate": "apiVersion: \"batch/v1\"\nkind: Job\nmetadata:\n  name: {{.Trial}}\n  namespace: {{.NameSpace}}\nspec:\n  template:\n    spec:\n      containers:\n      - name: {{.Trial}}\n        image: docker.io/kubeflowkatib/mxnet-mnist\n        command:\n        - \"python3\"\n        - \"/opt/mxnet-mnist/mnist.py\"\n        - \"--batch-size=64\"\n        {{- with .HyperParameters}}\n        {{- range .}}\n        - \"{{.Name}}={{.Value}}\"\n        {{- end}}\n        {{- end}}\n      restartPolicy: Never"
      }
    }
  }
}

experiment_name=experiment["metadata"]["name"]
custom_api.create_namespaced_custom_object(group="kubeflow.org", version="v1alpha3", namespace=namespace, plural="experiments", body=experiment)

## Wait for experiment succeeded status

In [None]:
status=False
while True:
    conditions=conditions=custom_api.get_namespaced_custom_object_status(group="kubeflow.org", version="v1alpha3", namespace=namespace, plural="experiments", name=experiment["metadata"]["name"])["status"]["conditions"]
    for i in range(len(conditions)):
        if (conditions[i]['type'])=='Succeeded':
            status=True
            print("Experiment Status: %s"%conditions[i]['type'])
            break
        
    if status:
        break
    print("Experiment Status: %s"%conditions[i]['type'])
    time.sleep(60)

## Preprocess dataset

#### Define function for preprocessing train data

In [None]:
def preprocess_train(train_df, n_prev, n_next):
    df = train_df.copy()
    input_feats, output_feats = [], []
    
    # Performing Shifting of Previous cases in the positive direction (downwards) for New cases & New Fatalities
    for i in range(1, n_prev+1):
        for feat in ["NewCases", "NewFatalities"]:
            df["{}_prev_{}".format(feat, i)] = df.groupby(["Country_Region", "Province_State"])[feat].shift(i)
            input_feats.append("{}_prev_{}".format(feat, i))
    
    # Performing Shifting of Next Cases in the negative direction (upwards) for New cases & New Fatalities
    output_feats.extend(["NewCases", "NewFatalities"])
    for i in range(1, n_next):
        for feat in ["NewCases", "NewFatalities"]:
            df["{}_next_{}".format(feat, i)] = df.groupby(["Country_Region", "Province_State"])[feat].shift(-i)
            output_feats.append("{}_next_{}".format(feat, i))
    df.dropna(inplace=True)     
    
    #Converting the Province state & Country Region to Dummy/Indicator Variables ( which is a constant)
    const_df = pd.get_dummies(df[["Province_State", "Country_Region"]], drop_first=True)
    
    # Assigning already available data for previous no of days counting back from starting date of forecasting dates
    time_df = df[input_feats]
    time_df = time_df.values.reshape((df.shape[0],-1,2))
    
    #Assigning values to the future no of days counting forth from the starting date of forecasting dates
    output_df = df[output_feats]
    return const_df, time_df, output_df

#### Define function for preprocessing test data

In [None]:
def preprocess_test(train_df, test_df, n_prev):
    input_feats = []
    
    #Appending the training data with test data records with date of specified no of forecasting dates
    append_df = pd.concat([train_df, test_df[test_df["Date"] == train_df["Date"].max() + timedelta(days=1)]])
    
    #Sorting the Dataframe in ascending order of Country region, province state & Date
    append_df.sort_values(["Country_Region", "Province_State", "Date"], ascending=[True, True, True], inplace=True)
    
    # Performing Shifting of Previous cases in the positive direction (downwards) for New cases & New Fatalities
    for i in range(1, n_prev+1):
        for feat in ["NewCases", "NewFatalities"]:
            append_df["{}_prev_{}".format(feat, i)] = append_df.groupby(["Country_Region", "Province_State"])[feat].shift(i)
            input_feats.append("{}_prev_{}".format(feat, i))
            
    # Adding a column of ForecastId if records are not having null values        
    append_df = append_df[append_df["ForecastId"].notnull()]
    
    #Converting the Province state & Country Region to Dummy/Indicator Variables ( which is a constant)
    const_df = pd.get_dummies(append_df[["Province_State", "Country_Region"]], drop_first=True)
    
    # Assigning already available data for previous no of days counting back from starting date of forecasting dates
    time_df = append_df[input_feats]
    time_df = time_df.values.reshape((append_df.shape[0],-1,2))
    
    return const_df, time_df


#### Define main preprocessing function

In [None]:
def covid_preprocess_function():
        
        # Read train and test datasets
        train_df = pd.read_csv(train_data_path)
        print("train_df shape: {0}" .format(train_df.shape))
        test_df = pd.read_csv(test_data_path)
        print("train_df shape: {0}" .format(test_df.shape))

        # Check the NaN value status in each column of the Train data
        # Checking whether no column except Province_State are having NaN values
        train_df.apply(lambda col: col.isnull().value_counts(), axis=0)
        test_df.apply(lambda col: col.isna().value_counts(), axis=0)

        # Replace the values of NaN with ""
        train_df["Province_State"] = train_df["Province_State"].fillna("")
        test_df["Province_State"] = test_df["Province_State"].fillna("")

        # Convert the Date column values to Pandas Datetime format
        train_df["Date"] = pd.to_datetime(train_df["Date"])
        test_df["Date"] = pd.to_datetime(test_df["Date"])

        # Add New Columns for "NewCases" and Fill the Column with difference values from the previous rows
        train_df["NewCases"] = train_df.groupby(["Country_Region", "Province_State"])["ConfirmedCases"].diff(periods=1)

        # Replace "NewCases" NaN values with 0
        train_df["NewCases"] = train_df["NewCases"].fillna(0)

        # Ensure that the NewCases are not negative. If NewCases are negative then they are replaced with zero else the actual value is provided
        train_df["NewCases"] = np.where(train_df["NewCases"] < 0, 0, train_df["NewCases"])

        # Add a column for "NewFatalities" same as for "NewCases"
        train_df["NewFatalities"] = train_df.groupby(["Country_Region", "Province_State"])["Fatalities"].diff(periods=1)
        train_df["NewFatalities"] = train_df["NewFatalities"].fillna(0)
        train_df["NewFatalities"] = np.where(train_df["NewFatalities"] < 0, 0, train_df["NewFatalities"])

        # Apply Natural Logarithmic Function to NewCases and NewFatalities Column
        train_df["NewCases"] = np.log(train_df["NewCases"] + 1)
        train_df["NewFatalities"] = np.log(train_df["NewFatalities"] + 1)
        # print("train_df \n", train_df.head())
        # print("test_df \n", test_df.head())
        
        #Calculate the number of days for which forecasting of New Cases & New Fatalities needs to be performed
        n_next = (test_df["Date"].max() - train_df["Date"].max()).days
        #print("No of Future Days requested to forecast COVID-19 New Cases & New Fatalities:", n_next)
        
        const_df, time_df, output_df = preprocess_train(train_df, n_next, n_next)
        
        const_test_df, time_test_df = preprocess_test(train_df, test_df, n_next)
        
        return (train_df, test_df, const_df, time_df, output_df, const_test_df, time_test_df)
        
        
train_df, test_df, const_df, time_df, output_df, const_test_df, time_test_df = covid_preprocess_function()        

#### Define function for creating & training model

In [None]:
def covid_model_train_function(model_export_path, epochs, batch_size):
    
    time_input = Input(shape=(time_df.shape[1], time_df.shape[2]))
    lstm = layers.LSTM(64)(time_input)

    const_input = Input(shape=(const_df.shape[1],))

    combine = layers.concatenate([lstm, const_input], axis=-1)
    #lstm_out = layers.Dropout(0.1)(combine)
    output = layers.Dense(output_df.shape[1], activation='softmax')(combine)

    model = Model([time_input, const_input], output)
    #optimizer=optimizers.SGD(lr=0.01, nesterov=True)
    model.compile(optimizer='adam',loss='mean_squared_error',metrics=['acc'])
    model.summary()
    
    model.fit([time_df, const_df], output_df, epochs=epochs, batch_size=batch_size)
    
    
    input_names = ['input1','input2']
    name_to_input = {name: t_input for name, t_input in zip(input_names, model.inputs)}
    

    tf.saved_model.simple_save(
        keras.backend.get_session(),
        os.path.join(model_export_path, "001"),
        inputs=name_to_input,
        outputs={t.name: t for t in model.outputs})    
    

## Define COVID class to be used for Kubeflow Fairing

In [None]:
class CovidServe(object):
    
    def __init__(self):
            self.model = None
            self.model_export_path = 'covid-model'
            self.epochs = 5
            self.batch_size = 64
                 

    def train(self):
         covid_model_train_function(self.model_export_path, self.epochs, self.batch_size)
         
    def predict(self,X,feature_names=None):
         
        path=os.path.join(os.getcwd(), '/mnt/covid-model')
        for dir in os.listdir(path):
            if re.match('[0-9]',dir):
                exported_path=os.path.join(path,dir)
                break
                
        global output_dict
        import logging
        # Open a Session to predict
        with tf.Session() as sess:
            tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], exported_path)
            predictor= tf.contrib.predictor.from_saved_model(exported_path,signature_def_key='serving_default')
            input_data1=[]
            input_data2=[]
            for i in range(len(X[0])):
                input_data1.append(X[0][i])
                input_data2.append(X[1][i])
            output_dict= predictor({"input1": input_data1, "input2": input_data2})
            
        sess.close()
        return output_dict["dense_1/Softmax:0"]


## Train COVID model on Kubeflow using Kubeflow Fairing
Kubeflow Fairing packages the CovidServe class, the training data, and the training job's software prerequisites as a Docker image. Then Kubeflow Fairing deploys and runs the training job on kubeflow.

In [None]:
train_job = TrainJob(CovidServe, input_files=[train_data_path, test_data_path,"requirements.txt"],
                     pod_spec_mutators = [mounting_pvc(pvc_name=pvc, pvc_mount_path="/mnt/")],
                     docker_registry=DOCKER_REGISTRY, backend=BackendClass(build_context_source=minio_context_source))
train_job.submit()

## Deploy trained model to Kubeflow for predictions using Kubeflow Fairing
Kubeflow Fairing packages the CovidServe class, the trained model, and the prediction endpoint's software prerequisites as a Docker image. Then Kubeflow Fairing deploys and runs the prediction endpoint on Kubeflow.

In [None]:
endpoint = PredictionEndpoint(CovidServe, input_files=[train_data_path, test_data_path,"requirements.txt"],
                              docker_registry=DOCKER_REGISTRY,
                              pod_spec_mutators = [mounting_pvc(pvc_name=pvc, pvc_mount_path="/mnt/")],
                              backend=BackendClass(build_context_source=minio_context_source))
endpoint.create()

## Obtain URL of prediction endpoint 

In [None]:
url = endpoint.url
url

## Design input data for prediction using prediction endpoint

In [None]:
input_data={"data":{"ndarray":[time_test_df.tolist(), const_test_df.values.tolist()]}}

## Predict using prediction endpoint
Please wait for few mins to execute this step, as Prediction Endpoint Deployer Pod may take sometime to be up.

In [None]:
import requests
import json
headers = {"Content-Type": "application/x-www-form-urlencoded"}
response=requests.post(url, data={'json':json.dumps(input_data)})
predictions=response.json()["data"]["ndarray"]
print(len(predictions))
print(predictions[0])

## Post-processing prediction results

#### Filter the part of test data with specified future dates and convert the exponential format back to normal values

Also concatenating the predicted new cases & new fatalities to the data

In [None]:
sub_test_df = test_df[test_df["Date"] > train_df["Date"].max()]
sub_test_df = pd.concat([sub_test_df,
                         pd.DataFrame(np.array(predictions).reshape((-1, 2)), columns=["NewCases", "NewFatalities"], index=sub_test_df.index)],
                         axis=1)
sub_test_df["NewCases"] = np.exp(sub_test_df["NewCases"]) - 1
sub_test_df["NewFatalities"] = np.exp(sub_test_df["NewFatalities"]) - 1
# sub_test_df.head()

# Filtering the part of test data with available dates and merging leftout dates if any
fixed_test_df = test_df[test_df["Date"] <= train_df["Date"].max()].merge(train_df[train_df["Date"] >= test_df["Date"].min()][["Province_State","Country_Region", "Date", "ConfirmedCases", "Fatalities"]],
                                                                         how="left", on=["Province_State","Country_Region", "Date"])
fixed_test_df

# Concatenate the fixed_test data with the sub_test data ( which includes predicted New cases & New Fatalities)
predict_df = pd.concat([sub_test_df, fixed_test_df]).sort_values(["Country_Region", "Province_State", "Date"],
                                                                 ascending=[True, True, True])

# Add a new Index column to Predict_df
predict_df = predict_df.reset_index()

# Replace null values of Confirmed Cases & Fatalities using New Cases & New Fatalities by accumulation
for i in range(len(predict_df)):
    if pd.isnull(predict_df.iloc[i]["ConfirmedCases"]):
        predict_df.loc[i, "ConfirmedCases"] = predict_df.iloc[i - 1]["ConfirmedCases"] + predict_df.iloc[i]["NewCases"]
    if pd.isnull(predict_df.iloc[i]["Fatalities"]):
        predict_df.loc[i, "Fatalities"] = predict_df.iloc[i - 1]["Fatalities"] + predict_df.iloc[i]["NewFatalities"]

# Ensure shape of Prediction result is same with test data
assert predict_df.shape[0] == test_df.shape[0]

# Get the Prediction result of Confirmed Cases & Fatalities for the specified future dates
predict_df[["ForecastId", "ConfirmedCases", "Fatalities"]].to_csv("submission.csv", index=False)
predict_df[["ForecastId", "ConfirmedCases", "Fatalities"]].head()

### Filter India's data & visualise

#### Visualise Plotly plot showing the overall trend of increasing confirmed cases right from the beginning of spread till the future date

The intersection of the green and the red line shows the breaking point whether the cases starts to increase drastically.

In [None]:
country = "India"

target = "ConfirmedCases"
region_train_df = train_df[(train_df["Country_Region"]==country)]
region_predict_df = predict_df[(predict_df["Country_Region"]==country)]

fig = plt.figure(figsize=(10, 6))

ax1 = fig.add_axes([0, 0, 1, 1])
ax1.plot(region_train_df["Date"],
         region_train_df[target],
         color="green")

ax1.plot(region_predict_df["Date"],
         region_predict_df[target],
         color="red")
plt.show()

## Delete prediction endpoint

In [None]:
endpoint.delete()