## COVID19 DIAGNOSIS PROJECT USING NUMPY,PANDAS,MATPLOTLIB & LINEAR
## REGRESSION

### Some Important Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import calendar as cd

In [None]:
#Read the csv file

covid_df = pd.read_csv('e:/covid_19_india.csv')

In [None]:
display(covid_df.head(10))
covid_df.tail(10)

In [None]:
#Delete Some columns

covid_df = covid_df.drop(['Sno','Time','ConfirmedIndianNational','ConfirmedForeignNational'],axis = 1)
display(covid_df.head(10))
covid_df.tail(10)

In [None]:
#Add 'Ratio' Column

covid_df['Ratio'] = covid_df['Deaths'] / covid_df['Cured'] * 100
display(covid_df.head(10))
covid_df.tail(10)

In [None]:
Date = dt.date(2021,6,1)
Starting_Date = '0' + str(Date.day) + '-0' + str(Date.month) + '-' + str(Date.year)
print(Starting_Date)

In [None]:
covid_Day_df = covid_df[covid_df['Date'] == Starting_Date]
display(covid_Day_df.head(10))
covid_Day_df.tail(10)

In [None]:
#Sorting the Column Data

max_confirmed_cases = covid_Day_df.sort_values(by = 'Confirmed',ascending = False)
max_confirmed_cases.head(10)

In [None]:
top_states_confirmed = max_confirmed_cases[:10]

### Bar Graph

In [None]:
%matplotlib inline
plt.figure(figsize = (8,6),dpi=100)

states = [state for state,df in top_states_confirmed.groupby('State/UnionTerritory')]

plt.bar(top_states_confirmed['State/UnionTerritory'],top_states_confirmed['Confirmed'],width = 0.5,color = 'y',align = 'center')
plt.xlabel('States',size = 15)
plt.ylabel('Confirmed Cases',size = 15)
plt.title('Covid19 confirmed cases in top 10 states',color = '#bf4127',fontdict = {'fontname' : 'Cambria Math','fontsize' : 15})
plt.xticks(states,rotation = 'vertical',size = 9)
plt.show()

In [None]:
max_death_cases = covid_Day_df.sort_values(by = 'Deaths',ascending = False)
display(max_death_cases.head(10))
max_death_cases.tail(10)

In [None]:
top_states_deaths = max_death_cases[:10]

In [None]:
%matplotlib inline
plt.figure(figsize = (8,6),dpi=100)

states = [state for state,df in top_states_confirmed.groupby('State/UnionTerritory')]

plt.bar(top_states_confirmed['State/UnionTerritory'],top_states_confirmed['Deaths'],width = 0.5,color = 'r')
plt.xlabel('States',size = 15)
plt.ylabel('Death Cases',size = 15)
plt.title('Covid19 death cases in top 10 states',color = 'b',fontdict = {'fontname' : 'Century','fontsize' : 15})
plt.xticks(states,rotation = 'vertical',size = 9)
plt.show()

In [None]:
max_cured_cases = covid_Day_df.sort_values(by = 'Cured',ascending = False)
display(max_cured_cases.head(10))
max_cured_cases.tail(10)

In [None]:
top_states_cured = max_cured_cases[:10]

In [None]:
%matplotlib inline
plt.figure(figsize = (8,6),dpi=100)

states = [state for state,df in top_states_confirmed.groupby('State/UnionTerritory')]

plt.bar(top_states_confirmed['State/UnionTerritory'],top_states_confirmed['Cured'],width = 0.5,color = 'g')
plt.xlabel('States',size = 15)
plt.ylabel('Cured Cases',size = 15)
plt.title('Covid19 cure cases in top 10 states',color = '#2bd56c',fontdict = {'fontname' : 'Comic Sans MS','fontsize' : 15})
plt.xticks(states,rotation = 'vertical',size = 9)
plt.show()

In [None]:
%matplotlib inline
plt.figure(figsize = (15,10),dpi=150)

states = [state for state,df in top_states_confirmed.groupby('State/UnionTerritory')]

x = np.arange(10)
plt.bar(x,top_states_confirmed['Confirmed'],width = 0.3,color = 'y')
plt.bar(x+0.3,top_states_confirmed['Deaths'],width = 0.3,color = 'r')
plt.bar(x+0.6,top_states_confirmed['Cured'],width = 0.3,color = 'g')

plt.xlabel('States',size = 20)
plt.ylabel('Different Cases',size = 20)
plt.title('Covid19--different cases in top 10 states',color = 'm',fontdict = {'fontname' : 'Comic Sans MS','fontsize' : 25})
plt.xticks(x+0.3,states,size =12)

plt.tight_layout()
plt.legend(labels = ['confirmed','deaths','cured'],loc = 'best')
plt.savefig('e:/Covid19_diagnosis.png')
plt.show()

In [None]:
top_confirmed = covid_df.groupby('State/UnionTerritory')['Confirmed'].max().idxmax()
print(top_confirmed)

In [None]:
#Maharastra

top_confirmed_state = covid_df[covid_df['State/UnionTerritory'] == top_confirmed]

display(top_confirmed_state.head(10))
top_confirmed_state.tail(10)

In [None]:
#West Bengal

west_bengal_df = covid_df[covid_df['State/UnionTerritory'] == 'West Bengal']

display(west_bengal_df.head(10))
west_bengal_df.tail(10)

In [None]:
#Delhi

delhi_df = covid_df[covid_df['State/UnionTerritory'] == 'Delhi']

display(delhi_df.head(10))
delhi_df.tail(10)

In [None]:
#Assam

assam_df = covid_df[covid_df['State/UnionTerritory'] == 'Assam']

display(assam_df.head(10))
assam_df.tail(10)

In [None]:
#Bihar

bihar_df = covid_df[covid_df['State/UnionTerritory'] == 'Bihar']

display(bihar_df.head(10))
bihar_df.tail(10)

In [None]:
%matplotlib inline

plt.figure(figsize = (15,10),dpi = 100)

plt.plot(top_confirmed_state['Date'],top_confirmed_state['Confirmed'],'y-')
plt.plot(top_confirmed_state['Date'],top_confirmed_state['Deaths'],'r-')
plt.plot(top_confirmed_state['Date'],top_confirmed_state['Cured'],'g-')

plt.ylabel('Maharashtra All Cases',size = 20)
plt.xlabel('Cases in Months',size = 20)
plt.title('MAHARASHTRA DATASET',color = '#143b9e',size = 30)
plt.xticks([],size = 9)

plt.legend(labels = ['confirmed','deaths','cured'],loc = 'best')
plt.show()

In [None]:
%matplotlib inline

plt.figure(figsize = (15,10),dpi = 100)

plt.plot(west_bengal_df['Date'],west_bengal_df['Confirmed'],'y-')
plt.plot(west_bengal_df['Date'],west_bengal_df['Deaths'],'r-')
plt.plot(west_bengal_df['Date'],west_bengal_df['Cured'],'g-')

plt.ylabel('West Bengal All Cases',size = 20)
plt.xlabel('Cases in Months',size = 20)
plt.title('WEST BENGAL DATASET',color = '#143b9e',size = 30)
plt.xticks([],size = 9)

plt.legend(labels = ['confirmed','deaths','cured'],loc = 'best')
plt.show()

In [None]:
%matplotlib inline

plt.figure(figsize = (15,10),dpi = 100)

plt.plot(delhi_df['Date'],delhi_df['Confirmed'],'y-')
plt.plot(delhi_df['Date'],delhi_df['Deaths'],'r-')
plt.plot(delhi_df['Date'],delhi_df['Cured'],'g-')

plt.ylabel('New Delhi All Cases',size = 20)
plt.xlabel('Cases in Months',size = 20)
plt.title('NEW DELHI DATASET',color = '#143b9e',size = 30)
plt.xticks([],size = 9)

plt.legend(labels = ['confirmed','deaths','cured'],loc = 'best')
plt.show()

### Pie Chart

In [None]:
%matplotlib inline
plt.figure(figsize = (12,9),dpi = 100)

plt.style.use('ggplot')

top_confirmed_state = top_confirmed_state.drop(top_confirmed_state[top_confirmed_state.Cured == 0].index)
west_bengal_df = west_bengal_df.drop(west_bengal_df[west_bengal_df.Cured == 0].index)
delhi_df = delhi_df.drop(delhi_df[delhi_df.Cured == 0].index)
assam_df = assam_df.drop(assam_df[assam_df.Cured == 0].index)
bihar_df = bihar_df.drop(bihar_df[bihar_df.Cured == 0].index)

MH = top_confirmed_state['Confirmed'].count()
WB = west_bengal_df['Confirmed'].count()
DL = delhi_df['Confirmed'].count()
AS = assam_df['Confirmed'].count()
BH = bihar_df['Confirmed'].count()

labels = ['Maharashtra','West Bengal','Delhi','Assam','Bihar']
colors = ['#f023bb','#32a825','#56efc4','#2e02a4','#d4cd23']

plt.pie([MH,WB,DL,AS,BH],labels = labels,colors = colors,autopct = '%.2f %%')

plt.title('Counting all non zero Cured Cases',fontdict = {'fontname' : 'Lucida Console','fontsize' : 15})
plt.legend(loc = 'best')
plt.savefig('e:/Covid19_diagnosis_pie.png')
plt.show()

In [None]:
%matplotlib inline
plt.figure(figsize = (12,9),dpi = 100)

plt.style.use('ggplot')

MH = top_confirmed_state['Cured'].max()
WB = west_bengal_df['Cured'].max()
DL = delhi_df['Cured'].max()
AS = assam_df['Cured'].max()
BH = bihar_df['Cured'].max()

labels = ['Maharashtra','West Bengal','Delhi','Assam','Bihar']
colors = ['#2fd419','#197ed4','#b609bf','#e33751','#dfe309']

plt.pie([MH,WB,DL,AS,BH],labels = labels,colors = colors,autopct = '%.2f %%')

plt.title('Maximum Cured Cases in 5 States',color = 'green',fontdict = {'fontname' : 'Bookman Old Style','fontsize' : 15})
plt.legend(loc = 'best')
plt.show()

In [None]:
%matplotlib inline
plt.figure(figsize = (12,9),dpi = 100)

plt.style.use('ggplot')

MH = top_confirmed_state['Deaths'].max()
WB = west_bengal_df['Deaths'].max()
DL = delhi_df['Deaths'].max()
AS = assam_df['Deaths'].max()
BH = bihar_df['Deaths'].max()

labels = ['Maharashtra','West Bengal','Delhi','Assam','Bihar']
colors = ['#378ee8','#68d8b4','#c922c7','#8dbb0d','#04ba66']

plt.pie([MH,WB,DL,AS,BH],labels = labels,colors = colors,autopct = '%.2f %%')

plt.title('Maximum Deaths Cases in 5 States',color = 'red',fontdict = {'fontname' : 'Cascadia Code','fontsize' : 15})
plt.legend(loc = 'best')
plt.show()

In [None]:
%matplotlib inline
plt.figure(figsize = (12,9),dpi = 100)

plt.style.use('ggplot')

top_confirmed_state = top_confirmed_state.drop(top_confirmed_state[top_confirmed_state.Cured == 0].index)
west_bengal_df = west_bengal_df.drop(west_bengal_df[west_bengal_df.Cured == 0].index)
delhi_df = delhi_df.drop(delhi_df[delhi_df.Cured == 0].index)
assam_df = assam_df.drop(assam_df[assam_df.Cured == 0].index)
bihar_df = bihar_df.drop(bihar_df[bihar_df.Cured == 0].index)

MH = top_confirmed_state['Confirmed'].max()
WB = west_bengal_df['Confirmed'].max()
DL = delhi_df['Confirmed'].max()
AS = assam_df['Confirmed'].max()
BH = bihar_df['Confirmed'].max()

labels = ['Maharashtra','West Bengal','Delhi','Assam','Bihar']
colors = ['#a548c6','#4Fb5b9','#c3918a','#75b322','#eeef07']

plt.pie([MH,WB,DL,AS,BH],labels = labels,colors = colors,autopct = '%.2f %%')

plt.title('Maximum Confirmed Cases in 5 States',color = 'orange',fontdict = {'fontname' : 'HP Simplified','fontsize' : 15})
plt.legend(loc = 'best')
plt.show()

### Using Linear Regression

In [None]:
west_bengal_df = west_bengal_df.sort_values(by = 'Confirmed',ascending = False).iloc[:10]
west_bengal_df

In [None]:
X = west_bengal_df[['Confirmed']].values
y = west_bengal_df[['Cured']].values

In [None]:
#Using train_test_split

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.4,random_state = 4)

In [None]:
#Using Linear-Regression

from sklearn.linear_model import LinearRegression
regr = LinearRegression()
regr.fit(X_train,y_train)

In [None]:
y_pred = regr.predict(X_test)
y_pred = y_pred.reshape(4,)
y_pred = np.sort(y_pred)
y_pred

In [None]:
regr.coef_

In [None]:
regr.intercept_

In [None]:
regr.score(X_test,y_test)

In [None]:
west_bengal_new_df = west_bengal_df.sort_values(by = 'Confirmed',ascending = False).iloc[:10]
west_bengal_new_df

In [None]:
X_test = X_test.reshape(4,)
y_test = y_test.reshape(4,)
X_test = np.sort(X_test)
y_test = np.sort(y_test)

In [None]:
%matplotlib inline
plt.figure(figsize = (10,6),dpi = 100)

plt.style.use('ggplot')

plt.scatter(X_test,y_pred,color = 'black')
plt.plot(X_test,y_test,'bo-',linewidth = 2,markerfacecolor = 'w',markersize = 7)

plt.grid(True)
plt.xlabel('X Axis',fontsize = 15)
plt.ylabel('Y Axis',fontsize = 15)
plt.title('X vs Y',color = 'magenta',fontsize = 20)

plt.legend(labels = ['X vs Predicted_Y','X vs Actual_Y'])
plt.show()

In [None]:
#Root Mean Squared Error

from sklearn.metrics import mean_squared_error,r2_score
print("Mean Squared Error : ",mean_squared_error(y_test,y_pred))
print("Root Mean Squared Error : ",np.sqrt(mean_squared_error(y_test,y_pred)))
print("R_squared value : %.2f" %r2_score(y_test,y_pred))

In [None]:
#Encodng Categorical Data

from sklearn.preprocessing import LabelEncoder,OneHotEncoder

labelencoder = LabelEncoder()
X = labelencoder.fit_transform(X)
X = sorted(X)
display(X)

X = np.array(X,ndmin = 2)
onehotencoder = OneHotEncoder()
X = onehotencoder.fit_transform(X).toarray()
X