### COVID-19 Positive Cases Increase Death?

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
covid = pd.read_csv("covid-states.csv")
covid.head()

In [None]:
print("Size/Shape of the dataset",covid.shape)
print("Checking for null values:\n",covid.isnull().sum())
print("Checking Data-type:",covid.dtypes)

In [None]:
#Dropping the column SNO
covid.drop(["dataQualityGrade"],1,inplace = True)

In [None]:
#Data Analysis for Washington DC
dc_data = covid[covid["state"]=="DC"]
dc_data.head()

In [None]:
dc_drop = dc_data.fillna(0)
dc_drop.head()

In [None]:
data_numeric = dc_data[['positive','negative','death','recovered','deathIncrease','totalTestsPeopleViral']]
plt.figure(figsize=(20, 10))
sns.pairplot(data_numeric)
plt.show()

## Modelling data with NAN values

In [None]:
X = dc_data['positive']
y = dc_data['deathIncrease'].values.reshape(-1,1)

### Split our data into training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
model = LinearRegression()
model

In [None]:
model.fit(X_train, y_train)

In [None]:
print(f"Training Data Score: {model.score(X_train, y_train)}")
print(f"Testing Data Score: {model.score(X_test, y_test)}")

In [None]:
predictions = model.predict(X)
print("The linear model is: Y = {:.5} + {:.5}X".format(model.intercept_[0], model.coef_[0][0]))
plt.figure(figsize=(16, 8))
plt.scatter(
    X,
    y,
    c='black'
)
plt.plot(
    X,
    predictions,
    c='blue',
    linewidth=2
)
plt.xlabel("Positive")
plt.ylabel("Death Increase")
plt.show()

In [None]:
poly = PolynomialFeatures(degree =4) 
X_poly = poly.fit_transform(X) 
  
poly.fit(X_poly, y) 
lin2 = LinearRegression() 
lin2.fit(X_poly, y) 
pred = lin2.predict(X_poly)
new_X, new_y = zip(*sorted(zip(X, pred)))
plt.figure(figsize=(16, 8))
plt.scatter(
    X,
    y,
    c='black'
)
plt.plot(
    new_X, new_y,
    c='blue'
)
plt.xlabel("Positive")
plt.ylabel("Death Increase")
plt.show()

### Modelling data with NAN values set as 0

In [None]:
X = dc_drop['positive'].values.reshape(-1,1)
y = dc_drop['deathIncrease'].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
model2 = LinearRegression()
model2

In [None]:
model2.fit(X_train, y_train)

In [None]:
print(f"Training Data Score: {model.score(X_train, y_train)}")
print(f"Testing Data Score: {model.score(X_test, y_test)}")

In [None]:
predictions = model2.predict(X)
print("The linear model is: Y = {:.5} + {:.5}X".format(model2.intercept_[0], model2.coef_[0][0]))
plt.figure(figsize=(16, 8))
plt.scatter(
    X,
    y,
    c='black'
)
plt.plot(
    X,
    predictions,
    c='blue',
    linewidth=2
)
plt.xlabel("Positive")
plt.ylabel("Death Increase")
plt.show()