# <img src="https://media.npr.org/assets/img/2021/02/01/gettyimages-1230235182_custom-24d5a0fe9793115c4ae460170eb26d6f01f289c5.jpg">

### Importing Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore") 
import time
from datetime import datetime
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker 
import pycountry_convert as pc
import folium
import missingno as msno
from datetime import datetime,date
from scipy.interpolate import make_interp_spline, BSpline
%matplotlib inline
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

## Importing COVID-19 Confirmed cases of India

In [None]:
data_confirmed_cases= pd.read_csv('/kaggle/input/india-covid19-cases-till-date/India_covid19_deaths_data.csv')

## Exploratory data analysis

In [None]:
data_confirmed_cases

### Coloumns/features in data

In [None]:
data_confirmed_cases.columns

### Data information

In [None]:
data_confirmed_cases.info()

### Data types of all coloumns

In [None]:
data_confirmed_cases.dtypes

### Data Description, here we can see the values of columns as minimum value, maximum value, counts etc

In [None]:
data_confirmed_cases.describe()

### Importing Deaths cases of COVID-19 directly from githubusercontent as updated dataset

In [None]:
data_deaths=pd.read_csv('/kaggle/input/india-covid19-cases-till-date/India_covid19_confirmed_cases_data.csv')

In [None]:
data_deaths.head()

### Coloumns/features in data

In [None]:
data_deaths.columns

### Data information

In [None]:
data_deaths.info()

### Data types of all coloumns

In [None]:
data_deaths.dtypes

### Data Description, here we can see the values of columns as minimum value, maximum value, counts etc

In [None]:
data_deaths.describe()

## Pre-processing of dataset

### Renaming the Province/State into state and Country/Region into country

In [None]:
df_confirmed = data_confirmed_cases.rename(columns={"Province/State":"state","Country/Region": "country"})
df_deaths = data_deaths.rename(columns={"Province/State":"state","Country/Region": "country"})

### Getting country name

In [None]:
countries = np.asarray(df_confirmed["country"])
countries

### Changing Continent code to Continent names as we will create a map and show the India 

In [None]:
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU' : 'Europe',
    'na' : 'Others'
}

### Defininng Function for getting continent code for country

In [None]:
def country_to_continent_code(country):
    try:
        return pc.country_alpha2_to_continent_code(pc.country_name_to_country_alpha2(country))
    except :
        return 'na'

### Repacing NaN values that we were getting in province so replacing with empty space

In [None]:
df_confirmed = df_confirmed.replace(np.nan, '', regex=True)
df_deaths = df_deaths.replace(np.nan, '', regex=True)

 ### Functions for the plots and graphs
 - plot_params()
 - visualize_covid_cases()
 - get_mortality_rate()

In [None]:
def plot_params(ax,axis_label= None, plt_title = None,label_size=15, axis_fsize = 15, title_fsize = 20, scale = 'linear' ):
    # Tick-Parameters
    ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
    ax.tick_params(which='both', width=1,labelsize=label_size)
    ax.tick_params(which='major', length=6)
    ax.tick_params(which='minor', length=3, color='0.8')

    # Grid
    plt.grid(lw = 1, ls = '-', c = "0.7", which = 'major')
    plt.grid(lw = 1, ls = '-', c = "0.9", which = 'minor')

    # Plot Title
    plt.title( plt_title,{'fontsize':title_fsize})
    
    # Yaxis sacle
    plt.yscale(scale)
    
    # Plot Axes Labels
    xl = plt.xlabel(axis_label[0],fontsize = axis_fsize)
    yl = plt.ylabel(axis_label[1],fontsize = axis_fsize)
    
def visualize_covid_cases(confirmed, deaths, continent=None , country = None , state = None, period = None, figure = None, scale = "linear"):
    x = 0
    if figure == None:
        f = plt.figure(figsize=(10,10))
        # Sub plot
        ax = f.add_subplot(111)
    else :
        f = figure[0]
        # Sub plot
        ax = f.add_subplot(figure[1],figure[2],figure[3])
    
    plt.tight_layout(pad=10, w_pad=5, h_pad=5)
    
    stats = [confirmed, deaths]
    label = ["Confirmed", "Deaths"]
    
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else: 
        params = ["All", "All"]

    for i,stat in enumerate(stats):
        if params[1] == "All" :
            cases = np.sum(np.asarray(stat.iloc[:,5:]),axis = 0)[x:]
        else :
            cases = np.sum(np.asarray(stat[stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        date = np.arange(1,cases.shape[0]+1)[x:]
        plt.plot(date,cases,label = label[i]+" (Total : "+str(cases[-1])+")")

    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(stats[0].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1].iloc[:,5:]),axis = 0)[x:]
        
    else :
        Total_confirmed =  np.sum(np.asarray(stats[0][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
        Total_deaths = np.sum(np.asarray(stats[1][stat[params[0]] == params[1]].iloc[:,5:]),axis = 0)[x:]
      
        
    text = "From "+stats[0].columns[4]+" to "+stats[0].columns[-2]+"\n"
    text += "Mortality rate : "+ str(int(Total_deaths[-1]/(Total_confirmed[-1])*10000)/100)+"\n"
    text += "Last 5 Days:\n"
    text += "Confirmed : " + str(Total_confirmed[-1] - Total_confirmed[-6])+"\n"
    text += "Deaths : " + str(Total_deaths[-1] - Total_deaths[-6])+"\n"
    text += "Last 24 Hours:\n"
    text += "Confirmed : " + str(Total_confirmed[-1] - Total_confirmed[-2])+"\n"
    text += "Deaths : " + str(Total_deaths[-1] - Total_deaths[-2])+"\n"
    
    plt.text(0.02, 0.78, text, fontsize=14, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes,bbox=dict(facecolor='white', alpha=0.4))
    
    # Plot Axes Labels
    axis_label = ["Days ("+df_confirmed.columns[5]+" - "+df_confirmed.columns[-1]+")","No of Cases"]
    
    # Plot Parameters
    plot_params(ax,axis_label,scale = scale)
    
    # Plot Title
    if params[1] == "All" :
        plt.title("COVID-19 Cases World",{'fontsize':25})
    else:   
        plt.title("COVID-19 Cases for "+params[0]+" "+params[1] ,{'fontsize':25})
        
    # Legend Location
    l = plt.legend(loc= "best",fontsize = 13)
    
    if figure == None:
        plt.show()
        
def get_total_cases(cases, country = "All"):
    if(country == "All") :
        return np.sum(np.asarray(cases.iloc[:,5:]),axis = 0)[-1]
    else :
        return np.sum(np.asarray(cases[cases["country"] == country].iloc[:,5:]),axis = 0)[-1]
    
def get_mortality_rate(confirmed,deaths, continent = None, country = None):
    if continent != None:
        params = ["continent",continent]
    elif country != None:
        params = ["country",country]
    else :
        params = ["All", "All"]
    
    if params[1] == "All" :
        Total_confirmed = np.sum(np.asarray(confirmed.iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths.iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    else :
        Total_confirmed =  np.sum(np.asarray(confirmed[confirmed[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        Total_deaths = np.sum(np.asarray(deaths[deaths[params[0]] == params[1]].iloc[:,5:]),axis = 0)
        mortality_rate = np.round((Total_deaths/Total_confirmed)*100,2)
    
    return np.nan_to_num(mortality_rate)
def dd(date1,date2):
    return (datetime.strptime(date1,'%m/%d/%y') - datetime.strptime(date2,'%m/%d/%y')).days

## General Analysis of Data

### Getting country data as count of confirm cases and deaths 

In [None]:
confirmed_cases =  df_confirmed.groupby(["country"]).sum().drop(['Lat','Long'],axis =1).iloc[:,-1]
deaths = df_deaths.groupby(["country"]).sum().drop(['Lat','Long'],axis =1).iloc[:,-1]

confirmed_cases.name = "Confirmed Cases"
deaths.name = "Deaths Reported"
df_countries_cases = pd.DataFrame([confirmed_cases,deaths]).transpose()

### Total India confirm and deaths cases in graph

In [None]:
rows = 4
f = plt.figure(figsize=(15,10*rows))

stats = [df_confirmed,df_deaths]
label = ["Confirmed","Deaths"]
threshold = [1500,100]
for i, stat in enumerate(stats):
    plt.tight_layout()
    df_countries = stat.groupby(["country"]).sum()
    df_countries = df_countries.sort_values(df_countries.columns[-1],ascending= False)
    others = df_countries[df_countries[df_countries.columns[-1]] < threshold[i] ].sum()[-1]
    df_countries = df_countries[df_countries[df_countries.columns[-1]] > threshold[i]]
    df_countries = df_countries[df_countries.columns[-1]]
    labels = [df_countries.index[i] +" (" + str(int(df_countries[i])) +") "for i in range(df_countries.shape[0])]

    ax = f.add_subplot(rows,1,i+1)
    plt.pie(df_countries, labels=labels,autopct='%1.1f%%',pctdistance=0.85, labeldistance=1.1,textprops = {'fontsize':10.5})
    my_circle=plt.Circle( (0,0), 0.7, color='white')
    p=plt.gcf()
    p.gca().add_artist(my_circle)
    plt.text(0.5,0.5,"Total India "+label[i]+ " Cases\n"+str(get_total_cases(stat)), horizontalalignment='center',verticalalignment='center',transform=ax.transAxes, size=18, alpha = 0.6)


plt.show()


In [None]:
pd.DataFrame(df_countries_cases.sum()).transpose().style.background_gradient(cmap='Wistia',axis=1)

### Visualization of India on Map

In [None]:
world_map = folium.Map(location=[10,0], tiles="cartodbpositron", zoom_start=2,max_zoom=6,min_zoom=2)
for i in range(0,len(df_confirmed)):
    folium.Circle(
        location=[df_confirmed.iloc[i]['Lat'], df_confirmed.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_confirmed.iloc[i]['country']+"</h5>"+
                    "<div style='text-align:center;'>"+str(np.nan_to_num(df_confirmed.iloc[i]['state']))+"</div>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #555;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed "+str(df_confirmed.iloc[i,-1])+"</li>"+

        "<li>Deaths   "+str(df_deaths.iloc[i,-1])+"</li>"+
        "</ul>"
        ,
        radius=(int((np.log(df_confirmed.iloc[i,-1]+1)))+0.2)*50000,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(world_map)

world_map

## COVID-19 Spread Analysis

### Spread of COVID-19 in  India, we can see mortality rate of all deaths with confirm cases and  deaths. In last 24 hours, confirm cases and deaths in India. 

In [None]:
df_countries = df_confirmed.groupby(["country"]).sum()
df_countries = df_countries.sort_values(df_countries.columns[-1],ascending = False)
countries = df_countries[df_countries[df_countries.columns[-1]] >= 1500].index

cols =2
rows = int(np.ceil(countries.shape[0]/cols))
f = plt.figure(figsize=(20,10*rows))
for i,country in enumerate(countries):
    visualize_covid_cases(df_confirmed, df_deaths,country = country,figure = [f,rows,cols, i+1])
plt.show()

### Trend of India Confirm cases. As we can see that the cases are increasing in blue line with every 4 days 10 power of 4 means around 10000 cases now. 

In [None]:
temp = df_confirmed.groupby('country').sum().drop(["Lat","Long"],axis =1).sort_values(df_confirmed.columns[-1], ascending= False)

threshold = 50
f = plt.figure(figsize=(10,12))
ax = f.add_subplot(111)
for i,country in enumerate(temp.index):
    if i >= 10:
        if country != "India":
            continue
    x = 30
    t = temp.loc[temp.index== country].values[0]
    t = t[t>threshold][:x]
     
    date = np.arange(0,len(t[:x]))
    xnew = np.linspace(date.min(), date.max(), 10)
    spl = make_interp_spline(date, t, k=1)  
    power_smooth = spl(xnew)
    if country != "India":
        plt.plot(xnew,power_smooth,label = country,linewidth =2)
    else:
        marker_style = dict(linewidth=4, linestyle='-', marker='o',markersize=10, markerfacecolor='#ffffff')
        plt.plot(date,t,"-.",label = country,**marker_style)

plt.tick_params(labelsize = 14)        
plt.xticks(np.arange(0,30,7),[ "Day "+str(i) for i in range(30)][::7])     


x = np.arange(0,18)
y = 2**(x+np.log2(threshold))
plt.plot(x,y,"--",linewidth =2,color = "gray")
plt.annotate("No. of cases doubles every day",(x[-2],y[-1]),xycoords="data",fontsize=14,alpha = 0.5)

x = np.arange(0,26)
y = 2**(x/2+np.log2(threshold))
plt.plot(x,y,"--",linewidth =2,color = "gray")
plt.annotate(".. every socend day",(x[-3],y[-1]),xycoords="data",fontsize=14,alpha = 0.5)

x = np.arange(0,26)
y = 2**(x/7+np.log2(threshold))
plt.plot(x,y,"--",linewidth =2,color = "gray")
plt.annotate(".. every week",(x[-3],y[-1]),xycoords="data",fontsize=14,alpha = 0.5)

x = np.arange(0,26)
y = 2**(x/30+np.log2(threshold))
plt.plot(x,y,"--",linewidth =2,color = "gray")
plt.annotate(".. every month",(x[-3],y[-1]),xycoords="data",fontsize=14,alpha = 0.5)



x = np.arange(0,26)
y = 2**(x/4+np.log2(threshold))
plt.plot(x,y,"--",linewidth =2,color = "Red")
plt.annotate(".. every 4 days",(x[-3],y[-1]),color="Red",xycoords="data",fontsize=14,alpha = 0.8)


plt.xlabel("Days",fontsize=17)
plt.ylabel("Number of Confirmed Cases",fontsize=17)
plt.title("Trend of India Confirm cases",fontsize=22)
plt.legend(loc = "upper left")
plt.yscale("log")
plt.grid(which="both")
plt.show()

### Mortality Rate Variation Over Period of Time. It is increasing very fast. 

In [None]:
df_continents= df_confirmed.groupby(["continent"]).sum()
continents = df_continents.sort_values(df_continents.columns[-1],ascending = False).index
continents = ["All"]+list(continents)

cols =1
rows = 2
axis_label = ["Days ("+df_confirmed.columns[5]+" - "+df_confirmed.columns[-1]+")","Mortality Rate (of 100)"]

f = plt.figure(figsize=(15,10*rows))


ax = f.add_subplot(211)
mortality_rate = get_mortality_rate(df_confirmed,df_deaths,continent=continents[0])
plt.plot(np.arange(1,mortality_rate.shape[0]+1),mortality_rate,label = "Current Mortality Rate "+str(mortality_rate[-1]))

plt_title = "Mortality Rate Curve of India"
plot_params(ax,axis_label,plt_title)
l = plt.legend(loc= "best")
plt.show()


### Preparing India confirm cases and Deaths for training the machine learning models

In [None]:
df_confirmed_report = df_confirmed.copy()
df_confirmed_report = df_confirmed_report.groupby("country").sum().drop(["Lat","Long"],axis =1)
df_confirmed_report.loc["Total"] = df_confirmed_report.sum()
df_confirmed_newcases = df_confirmed_report.groupby(level =0).diff(axis =1)
df_confirmed_newcases=df_confirmed_newcases.drop(columns=['1/22/20'])

df_deaths_report = df_deaths.copy()
df_deaths_report = df_deaths_report.groupby("country").sum().drop(["Lat","Long"],axis =1)
df_deaths_report.loc["Total"] = df_deaths_report.sum()
df_deaths_newcases = df_deaths_report.groupby(level =0).diff(axis =1)
df_deaths_newcases=df_deaths_newcases.drop(columns=['1/22/20'])

## India Confirm Cases Predictions of COVID-19

### Preparing Confirm cases data for Training and predictions

In [None]:
confirm=df_confirmed_newcases
confirm=confirm.T

confirm['Date']=confirm.index

confirm=confirm[['Date', 'India']]

confirm_cases=pd.DataFrame()

confirm_cases['Date']=confirm['Date']
confirm_cases['India cases']=confirm['India']

confirm_cases=confirm_cases.reset_index()
confirm_cases=confirm_cases.drop(columns=['index'])

### If we look at the first 49 days so there are no cases in India but few in some day between but from index 46 to 49 increasing slowly 

In [None]:
confirm_cases.head(60)

### So going to Ignore fisrt above days data and goin to use the next 

In [None]:
confirm_cases=confirm_cases[49:]
confirm_cases

In [None]:
confirm_cases['India cases']=confirm_cases['India cases'].replace(0,0.1)

### Date settings and India cases preparation accroding to dates

In [None]:
confirm_cases['times'] = pd.to_datetime(confirm_cases['Date'])  
confirm_cases.set_index('times', inplace=True)                   
confirm_cases.sort_index(ascending=True, inplace=True)
tar='India Confirm Cases Predictions'
confirm_cases.rename(columns={'India cases':tar}, inplace=True)
confirm_cases=confirm_cases[[tar]]

### Spliting dates into  month, year and Day of month

In [None]:
def create_features(data, label=None):
    data['date'] = data.index
    data['month'] = data['date'].dt.month
    data['year'] = data['date'].dt.year
    data['dayofmonth'] = data['date'].dt.day
    X = data[['month','year','dayofmonth']]
    if label:
        y = data[label]
        return X, y
    return X

### Data for training input and prediction output

In [None]:
test=confirm_cases.copy()
X= create_features(test)
y=confirm_cases

## -------------Machine Learning Algorithm and Evaluation---------------

### PLS Algorithm

In [None]:
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    pls.fit(Xtrain, ytrain)
    y_pred = pls.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

### Support vector  Regressor Algorithm

In [None]:
from sklearn.svm import LinearSVR
svr = LinearSVR()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    svr.fit(Xtrain, ytrain)
    y_pred = svr.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

### Random Forest  Regressor Algorithm

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    rf.fit(Xtrain, ytrain)
    y_pred = pls.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

## As we can see the results of three algorithms above but random forest regression algorithm performed well and we got minimum mean absolute error and root mean squared error. So now going to train all data on random forest algorithm and then we will get predictions of CONFIRM CASES

In [None]:
rf=RandomForestRegressor()
x=np.array(X)
y=np.array(y)
y=y.ravel()
RF=rf.fit(Xtrain, ytrain)

### Forecasting function by using the trained model for predictions

In [None]:
frequency='D'
t=datetime.now()
def forecast(number):
  datelist = pd.date_range(t, periods=number,freq=frequency)
  datelist= pd.DataFrame(datelist,columns=['TIMESTAMP'])
  datelist.index = datelist['TIMESTAMP']
  datelist_featured=create_features(datelist)
  prediction=RF.predict(datelist_featured)
  pred=pd.DataFrame(prediction,columns=confirm_cases.columns)
  pred.index = datelist['TIMESTAMP']
  return pred

## Predictions confirm cases of next 7 days

In [None]:
forecast=forecast(7)
forecast

In [None]:
forecast.plot(figsize=(20, 5), color='r')

## India Deaths Predictions of COVID-19

### Preparing deaths cases data for Training and predictions

In [None]:
deaths=df_deaths_newcases
deaths=deaths.T

deaths['Date']=deaths.index

deaths=deaths[['Date', 'India']]

deaths_cases=pd.DataFrame()

deaths_cases['Date']=deaths['Date']
deaths_cases['India cases']=deaths['India']

deaths_cases=deaths_cases.reset_index()
deaths_cases=deaths_cases.drop(columns=['index'])

### If we look at the first 65 days so there are no deaths in India but few in some day between but from index 48 to 65 increasing slowly 

In [None]:
deaths_cases.head(60)

### So going to Ignore fisrt above days data and goin to use the next 

In [None]:
deaths_cases=deaths_cases[65:]
deaths_cases

In [None]:
deaths_cases['India cases']=deaths_cases['India cases'].replace(0,0.1)

### Date settings and India deaths_cases preparation accroding to dates

In [None]:
deaths_cases['times'] = pd.to_datetime(deaths_cases['Date'])  
deaths_cases.set_index('times', inplace=True)                   
deaths_cases.sort_index(ascending=True, inplace=True)
tar='India Deaths Predictions'
deaths_cases.rename(columns={'India cases':tar}, inplace=True)
deaths_cases=deaths_cases[[tar]]

### Spliting dates into  month, year and Day of month

In [None]:
def create_features(data, label=None):
    data['date'] = data.index
    data['month'] = data['date'].dt.month
    data['year'] = data['date'].dt.year
    data['dayofmonth'] = data['date'].dt.day
    X = data[['month','year','dayofmonth']]
    if label:
        y = data[label]
        return X, y
    return X

### Data for training input and prediction output

In [None]:
test=deaths_cases.copy()
X= create_features(test)
y=deaths_cases

# ----------------Machine Learning Algorithm and Evaluation-----------------

### PLS Algorithm

In [None]:
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    pls.fit(Xtrain, ytrain)
    y_pred = pls.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

### Support vector  Regressor Algorithm

In [None]:
from sklearn.svm import LinearSVR
svr = LinearSVR()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    svr.fit(Xtrain, ytrain)
    y_pred = svr.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

### Random Forest  Regressor Algorithm

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()

### Cross validation

In [None]:
x=np.array(X)
y=np.array(y)
y=y.ravel()
kf = KFold(n_splits=5)
outcomes2 = []
ClassR=0
ConM=0
fold = 0
i=0
conf_matrix_list_of_arrays = []
for train_index, test_index in kf.split(x,y):
    i=i+1
    print('\n')
    print("Cross validation",i)
    fold += 1
    Xtrain, Xtest = x[train_index], x[test_index]
    ytrain, y_test = y[train_index], y[test_index]
    rf.fit(Xtrain, ytrain)
    y_pred = pls.predict(Xtest)
    MAE=mean_absolute_error(y_test,y_pred)
    print('Mean Absolute error: ', MAE)
    mse=mean_squared_error(y_test,y_pred)
    rmse = math.sqrt(mse)
    print('Root mean squared error: ', rmse)

### As we can see the results of three algorithms above but random forest regression algorithm performed well and we got minimum mean absolute error and root mean squared error. So now going to train all data on random forest algorithm and then we will get predictions of DEATHS CASES

In [None]:
rf=RandomForestRegressor()
x=np.array(X)
y=np.array(y)
y=y.ravel()
RF=rf.fit(Xtrain, ytrain)

### Forecasting function by using the trained model for predictions

In [None]:
frequency='D'
t=datetime.now()
def forecast(number):
  datelist = pd.date_range(t, periods=number,freq=frequency)
  datelist= pd.DataFrame(datelist,columns=['TIMESTAMP'])
  datelist.index = datelist['TIMESTAMP']
  datelist_featured=create_features(datelist)
  prediction=RF.predict(datelist_featured)
  pred=pd.DataFrame(prediction,columns=deaths_cases.columns)
  pred.index = datelist['TIMESTAMP']
  return pred

### Predictions death cases of next 7 days

In [None]:
forecast=forecast(7)
forecast

In [None]:
forecast.plot(figsize=(20, 5), color='r')

<div class="alert alert-block alert-info">  
<h1>If you like my work, please upvote ^ 👍 my kernel so that i will be motivated to share more content to Kaggle community. Thanks 😍</h1>
        </p>
</div>