### COVID-19 Deaths

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC  
from sklearn.linear_model import LinearRegression 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
covid = pd.read_csv("covid-states.csv")
covid.head()

Unnamed: 0,date,state,dataQualityGrade,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
0,2020-12-02,AK,A,122.0,122.0,0,,768.0,768.0,164.0,...,1024643.0,6015,,,,,,0,1024643.0,6015
1,2020-12-02,AL,A,3711.0,3326.0,73,385.0,25821.0,25821.0,1801.0,...,1603523.0,9681,,,73187.0,,1603523.0,9681,,0
2,2020-12-02,AR,A+,2522.0,2312.0,10,210.0,9110.0,9110.0,1088.0,...,1703266.0,11813,,21856.0,,144866.0,,0,1703266.0,11813
3,2020-12-02,AS,D,0.0,,0,,,,,...,1988.0,0,,,,,,0,1988.0,0
4,2020-12-02,AZ,A+,6739.0,6237.0,52,502.0,26312.0,26312.0,2699.0,...,2288204.0,16290,365871.0,,,,2288204.0,16290,,0


In [3]:
print("Size/Shape of the dataset",covid.shape)
print("Checking for null values:\n",covid.isnull().sum())
print("Checking Data-type:",covid.dtypes)

Size/Shape of the dataset (15409, 42)
Checking for null values:
 date                                    0
state                                   0
dataQualityGrade                     1261
death                                 826
deathConfirmed                       8691
deathIncrease                           0
deathProbable                       10413
hospitalized                         6127
hospitalizedCumulative               6127
hospitalizedCurrently                3105
hospitalizedIncrease                    0
inIcuCumulative                     12757
inIcuCurrently                       7939
negative                              308
negativeIncrease                        0
negativeTestsAntibody               14438
negativeTestsPeopleAntibody         14657
negativeTestsViral                  12154
onVentilatorCumulative              14496
onVentilatorCurrently                9326
positive                              150
positiveCasesViral                   3509
positiveInc

In [4]:
#Dropping the column SNO
covid.drop(["dataQualityGrade"],1,inplace = True)

In [5]:
#Data Analysis for Washington DC
dc_data = covid[covid["state"]=="DC"]
dc_data.head()

Unnamed: 0,date,state,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,hospitalizedIncrease,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
8,2020-12-02,DC,690.0,,5,,,,165.0,0,...,701485.0,4076,,,,,308164.0,828,,0
64,2020-12-01,DC,685.0,,5,,,,160.0,0,...,697409.0,2153,,,,,307336.0,889,,0
120,2020-11-30,DC,680.0,,0,,,,158.0,0,...,695256.0,4914,,,,,306447.0,884,,0
176,2020-11-29,DC,680.0,,2,,,,145.0,0,...,690342.0,5004,,,,,305563.0,1530,,0
232,2020-11-28,DC,678.0,,1,,,,157.0,0,...,685338.0,12899,,,,,304033.0,4071,,0


In [6]:
dc_data = dc_data.fillna(0)
dc_data.head()

Unnamed: 0,date,state,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,hospitalizedIncrease,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
8,2020-12-02,DC,690.0,0.0,5,0.0,0.0,0.0,165.0,0,...,701485.0,4076,0.0,0.0,0.0,0.0,308164.0,828,0.0,0
64,2020-12-01,DC,685.0,0.0,5,0.0,0.0,0.0,160.0,0,...,697409.0,2153,0.0,0.0,0.0,0.0,307336.0,889,0.0,0
120,2020-11-30,DC,680.0,0.0,0,0.0,0.0,0.0,158.0,0,...,695256.0,4914,0.0,0.0,0.0,0.0,306447.0,884,0.0,0
176,2020-11-29,DC,680.0,0.0,2,0.0,0.0,0.0,145.0,0,...,690342.0,5004,0.0,0.0,0.0,0.0,305563.0,1530,0.0,0
232,2020-11-28,DC,678.0,0.0,1,0.0,0.0,0.0,157.0,0,...,685338.0,12899,0.0,0.0,0.0,0.0,304033.0,4071,0.0,0


In [7]:
dc_data.set_index('date', inplace =True)


In [8]:
# Specified data columns
X = dc_data[['hospitalized','onVentilatorCurrently','positive',\
             'positiveCasesViral','positiveTestsAntibody', 'positiveTestsAntigen','positiveTestsPeopleAntibody',\
             'positiveTestsPeopleAntigen','positiveTestsViral','totalTestEncountersViral',\
             'totalTestEncountersViralIncrease','totalTestResults','totalTestResultsIncrease',\
             'totalTestsAntibody', 'totalTestsAntigen','totalTestsPeopleAntibody','totalTestsPeopleAntigen',\
             'totalTestsPeopleViral','totalTestsPeopleViralIncrease','totalTestsViral']]
y = dc_data['death']

In [9]:
len(dc_data.columns)


40

### Model 1 - SVC

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [11]:
model = SVC()
model

SVC()

In [12]:
model.fit(X_train, y_train)

SVC()

In [13]:
print(f"Training Data Score: {model.score(X_train, y_train)}")
print(f"Testing Data Score: {model.score(X_test, y_test)}")

Training Data Score: 0.14215686274509803
Testing Data Score: 0.13043478260869565


#### Low Scores -  different model required for training and testing data

## Model 2 - LinearRegression

In [14]:
# Specified data columns
X = dc_data[['hospitalized','onVentilatorCurrently','positive',\
             'positiveCasesViral','positiveTestsAntibody', 'positiveTestsAntigen','positiveTestsPeopleAntibody',\
             'positiveTestsPeopleAntigen','positiveTestsViral','totalTestEncountersViral',\
             'totalTestEncountersViralIncrease','totalTestResults','totalTestResultsIncrease',\
             'totalTestsAntibody', 'totalTestsAntigen','totalTestsPeopleAntibody','totalTestsPeopleAntigen',\
             'totalTestsPeopleViral','totalTestsPeopleViralIncrease','totalTestsViral']]
y = dc_data['death']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [16]:
model = LinearRegression()
model

LinearRegression()

In [17]:
model.fit(X_train, y_train)

LinearRegression()

In [18]:
print(f"Training Data Score: {model.score(X_train, y_train)}")
print(f"Testing Data Score: {model.score(X_test, y_test)}")

Training Data Score: 0.9948860479279095
Testing Data Score: 0.9953102789598155
