<a href="https://colab.research.google.com/github/AjaybirRandhawa/Covid-19USA/blob/main/Covid_19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import math
import random
import time
import io
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
import datetime as dt
import operator
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from google.colab import files
uploaded = files.upload()

In [None]:
import io
confirmed_cases = pd.read_csv(io.BytesIO(uploaded['time_series_covid_19_confirmed.csv']))
recovered_cases = pd.read_csv(io.BytesIO(uploaded['time_series_covid_19_recovered.csv']))
deaths_reported = pd.read_csv(io.BytesIO(uploaded['time_series_covid_19_deaths.csv']))
# Datasets are now stored in a Pandas Dataframe

In [None]:
#Now to check the data stored in it
confirmed_cases.head()

In [None]:
recovered_cases.head()

In [None]:
deaths_reported.head()

In [None]:
cols = confirmed_cases.keys()
cols

In [None]:
#Extract only the dates
confirmed = confirmed_cases.loc[:, cols[4]:cols[-1]]
deaths = deaths_reported.loc[:, cols[4]:cols[-1]]
recoveries = recovered_cases.loc[:, cols[4]:cols[-1]]

In [None]:
confirmed.head()

In [None]:
#Finding the total cases
dates = confirmed.keys()
world_cases = []
total_deaths = []
mortality_rates = []
total_recovered = []
for i in dates:
  confirmed_sum = confirmed[i].sum()
  death_sum = deaths[i].sum()
  recovered_sum = recoveries[i].sum()
  world_cases.append(confirmed_sum)
  total_deaths.append(death_sum)
  mortality_rates.append(death_sum/confirmed_sum)
  total_recovered.append(recovered_sum)

In [None]:
confirmed_sum, death_sum, recovered_sum

In [None]:
#Convert them into a np array
days_since_start = np.array([i for i in range(len(dates))]).reshape(-1,1)
world_cases = np.array(world_cases).reshape(-1,1)
total_deaths = np.array(total_deaths).reshape(-1,1)
total_recovered = np.array(total_recovered).reshape(-1,1)

In [None]:
#Check
days_since_start

In [None]:
#Future Forcasting of 10 days
future_days = 10
future_forecast = np.array([i for i in range(len(dates)+future_days)]).reshape(-1,1)
adjusted_days = future_forecast[:-10]

In [None]:
future_forecast

In [None]:
start = '1/22/2020'
start_date = dt.datetime.strptime(start, '%m/%d/%Y')
future_forecast_days = []
for i in range(len(future_forecast)):
  future_forecast_days.append((start_date + dt.timedelta(days=i)).strftime('%m/%d/%Y'))

In [None]:
#For visualization
latest_confirmed = confirmed_cases[dates[-1]]
latest_deaths = deaths_reported[dates[-1]]
latest_recovered = recovered_cases[dates[-1]]
latest_confirmed, latest_deaths, latest_recovered

In [None]:
#List of Unique Countries
unique_countries = list(confirmed_cases['Country/Region'].unique())
unique_countries

In [None]:
#Calculate the total number of cases in each country
country_confirmed_cases = []
no_cases = []
for i in unique_countries:
  cases = latest_confirmed[confirmed_cases['Country/Region']==i].sum()
  if cases > 0:
    country_confirmed_cases.append(cases)
  else:
    no_cases.append(i)
for i in no_cases:
  unique_countries.remove(i)

unique_countries = [k for k, v in sorted(zip(unique_countries, country_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_countries)):
  country_confirmed_cases[i] = latest_confirmed[confirmed_cases['Country/Region']==unique_countries[i]].sum()

In [None]:
print("Confirmed Cases per Country/Region:")
for i in range(len(unique_countries)):
  print(f'{unique_countries[i]}: {country_confirmed_cases[i]} cases')

In [None]:
plt.figure(figsize=(32,200))
holder = []
for i in range(len(country_confirmed_cases)):
  holder.append(country_confirmed_cases[i]/1000)
plt.barh(unique_countries, holder)
plt.title("Covid-19 Confirmed Cases in Countries", size=20)
plt.xlabel("Number of Cases in Thousands")
plt.show()

In [None]:
#Visualization of all countries that are top 10
visual_unique_countries = []
visual_confirmed_cases = []
others = np.sum(holder[10:])
for i in range(len(holder[:10])):
  visual_unique_countries.append(unique_countries[i])
  visual_confirmed_cases.append(holder[i])

visual_unique_countries.append("Others")
visual_confirmed_cases.append(others)

In [None]:
plt.figure(figsize=(32,20))
plt.barh(visual_unique_countries, visual_confirmed_cases)
plt.title("Top 10 Countries on Covid Cases in Thousands", size=20)
plt.show()

In [None]:
#Pie chart of the above visualization

c = random.choices(list(mcolors.CSS4_COLORS.values()), k = len(unique_countries))
plt.figure(figsize=(10,10))
plt.title("Covid-19 Cases in top 10 Countries")
plt.pie(visual_confirmed_cases, colors=c)
plt.legend(visual_unique_countries, loc="best")
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_start[100:], world_cases[100:], test_size=0.15, shuffle=False)
linear_model = LinearRegression(normalize=True, fit_intercept=True)
linear_model.fit(X_train_confirmed, y_train_confirmed)
test_linear_prediction = linear_model.predict(X_test_confirmed)
linear_prediction = linear_model.predict(future_forecast)
print("MAE: ", mean_absolute_error(test_linear_prediction, y_test_confirmed))
print("MSE: ", mean_squared_error(test_linear_prediction, y_test_confirmed))


In [None]:
plt.plot(y_test_confirmed)
plt.plot(test_linear_prediction)

In [None]:
plt.figure(figsize=(20,12))
plt.plot(adjusted_days, world_cases)
plt.plot(future_forecast, linear_prediction, linestyle="dashed", color="orange")
plt.title("Number of Cases Over Time", size=25)
plt.xlabel("Days since 5/02/2020", size=20)
plt.ylabel("Number of Cases", size=20)
plt.legend(['Confirmed Cases', 'Linear Regression Predictions'])
plt.xticks(size=15)
plt.yticks(size=15)
plt.show()

In [None]:
#Linear Regression Prediction for the next 10 days
print('Linear Regression Future Predictions: ')
print(linear_prediction[-10:])

In [None]:
#Building the SVM Model
kernel = ['poly', 'sigmond', 'rbf']
c = [0.01, 0.1, 1, 10]
gamma = [0.01, 0.1, 1]
epsilon = [0.01, 0.1, 1]
shrinking = [True, False]
svm_grid = {'kernel': kernel, 'C': c, 'gamma': gamma, 'epsilon': epsilon, 'shrinking': shrinking}

svm = SVR(cache_size = 7000)
svm_search = RandomizedSearchCV(svm, svm_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=10, verbose=1 )
svm_search.fit(X_train_confirmed, y_train_confirmed)

In [None]:
svm_search.best_params_

In [None]:
svm_confirmed = svm_search.best_estimator_
svm_pred = svm_confirmed.predict(future_forecast)

In [None]:
svm_confirmed, svm_pred

In [None]:
svm_test_pred = svm_confirmed.predict(X_test_confirmed)
plt.plot(svm_test_pred)
plt.plot(y_test_confirmed)
print("MAE: ", mean_absolute_error(svm_test_pred, y_test_confirmed))

In [None]:
plt.figure(figsize=(20, 20))
plt.plot(adjusted_dates, world_cases[100:])
plt.plot(future_forecast, svm_pred, linestyle='dashed')
plt.plot('Number of Cases Over Time', size=30)
plt.xlabel('Days Since 5/02/2020', size=25)
plt.ylabel('Number of Cases', size=25)
plt.legend(['Confirmed Cases', 'Predicted Cases'])
plt.xticks(size=15)
plt.yticks(size=15)