In [65]:
import os #to manipulate file paths 

import numpy as np # facilitate advanced mathematical and other types of operations on large numbers of data


import pandas as pd # data processing ie allows importing data from various file format sand data analysis

for dirname, _, filenames in os.walk('../input/covid19-in-india/covid_19_india.csv'): #obtains the folder/directory name and 

    for filename in filenames:     #filenames and then loops through each filename to attach to single path/directory
        print(os.path.join(dirname, filename)) # combines one or more path names into a single path

    
    

In [66]:

import matplotlib.pyplot as plt        #visualization library in Python

%matplotlib inline                    
# lead to static images of your plot embedded in the notebook.

import seaborn as sns      #built on top of matplotlib, provides beautiful default styles and 
                           # colour palettes to make statistical plots more attractive
import plotly      #python library which is used to design graphs, especially interactive graphs It can plot various types of graphs  
                   #  and charts like scatter plots, line charts, bar charts, box plots, histograms, pie charts.         



import plotly.express as px  # it does a lot of the things that you can do it with Plotly but with a much simpler syntax.

import plotly.graph_objects as go

import cufflinks as cf   #binds the power of Plotly with the flexibility of Pandas for easy plotting

import plotly.offline as pyo  #Plotly Offline allows you to create graphs offline and save them locally.

from plotly.offline import init_notebook_mode,plot,iplot

import folium             #Folium is a Python library used for visualizing geospatial data(basically used to plot interactive maps).

from folium import plugins

plt.rcParams['figure.figsize'] = 10, 12

import warnings

warnings.filterwarnings('ignore')  # To ignore the warnings raised

In [67]:
pyo.init_notebook_mode(connected=True) #to display the plot inside the notebook, we need to initiate plotly’s notebook mode 
cf.go_offline()  #making the plots offline

In [68]:
df= pd.read_csv(r"../input/covid19-in-india/covid_19_india.csv")  #entering the dataset's path
df.head()    #displays number all the column headers

In [69]:
df.isnull().sum()   # Returns the number of missing values in the data set.

In [70]:
df.drop(['ConfirmedIndianNational','ConfirmedForeignNational'],axis=1,inplace=True) #Restricts the desired columns from being considering in this notebook

In [71]:
df.shape   # displays number of rows and columns 

In [72]:
df.head()  #checking whether the desired columns are restricted or not

# Recovered cases and Deaths

In [73]:
f, ax = plt.subplots(figsize=(18, 38))
data = df[['State/UnionTerritory','Confirmed','Cured','Deaths']]   #data requiered to plot the graph
data.sort_values('Confirmed',ascending=False,inplace=True)   #arranging values of column 'confirmed' in descending order
 
sns.barplot(x="Confirmed", y="State/UnionTerritory", data=data,label="Total", color="red") 
     
sns.barplot(x="Cured", y="State/UnionTerritory", data=data, label="Cured", color="orange") 

ax.legend(ncol=2, loc="lower right", frameon=True)   #location of legend to be printed
ax.set(xlim=(0, 6600000))  #maximum limit for x axis values
sns.despine(left=True, bottom=True)

##Confirmed cases, Recovered cases and DeathsTrolls in India 

In [74]:
df_confirmed_india=df.groupby('Date')['Confirmed'].sum().reset_index()   #pandas package is used to group here

#creates a column sorted based on the 'Date' and the repeating elements are summed together to make it as a single value using '.sum() then the index is reset uisng '.reset_index()
df_cured_india=df.groupby('Date')['Cured'].sum().reset_index()
df_death_india=df.groupby('Date')['Deaths'].sum().reset_index()
print("The Confirmed Cases are",df_confirmed_india.Confirmed.max())
print("The Recovered Cases are",df_cured_india.Cured.max())
print("The Deaths Cases are",df_death_india.Deaths.max())

## Day-wise cases in India

> ## Day-wise Cases been confirmed

#Death cases day-wise

In [75]:
 fig = go.Figure()
#every figure produced with the plotly library actually uses graph objects under the hood certain kinds of figures are not 
#yet possible to create with Plotly Express, such as figures that use certain 3D trace-types like mesh or isosurface
    
    #plot using graph objects
fig.add_trace(go.Scatter(x=df_confirmed_india['Date'], y = df_confirmed_india['Confirmed'], mode=' markers',name='Total Cases'))
fig.update_layout(title_text='Trend of Coronavirus Cases in India (Cumulative cases)',plot_bgcolor='rgb(223, 223, 223)')
fig.show()
# New COVID-19 cases reported daily in India
import plotly.express as px
#plot using plotly express
fig = px.bar(df_confirmed_india, x="Date", y="Confirmed", barmode='group', height=400)  
fig.update_layout(title_text='Coronavirus Cases in India on daily basis')
fig.show()

In [76]:
fig = go.Figure()
 #plot is using plotly.graph_objects.
fig.add_trace(go.Scatter(x=df_death_india['Date'], y = df_death_india['Deaths'], mode='lines+markers',name='Total Deaths',line=dict(color='Red', width=2)))
fig.update_layout(title_text='Trend of Death Cases in India',plot_bgcolor='rgb(230, 230, 230)')
fig.show()
# New COVID-19 cases reported daily in India
import plotly.express as px
fig = px.bar(df_death_india, x="Date", y="Deaths", barmode='group', height=500)
fig.update_layout(title_text='Coronavirus Deaths in India on daily basis')
fig.show()

#Day-Wise Confirmed, Deaths and Cured cases

In [77]:
# multiple plots in a single figure using graph objects

fig = go.Figure()
fig.add_trace(go.Scatter(x=df_confirmed_india['Date'], y=df_confirmed_india['Confirmed'], mode='lines+markers', name='Confirmed',line=dict(color='red', width=2)))
fig.add_trace(go.Scatter(x=df_death_india['Date'], y=df_death_india['Deaths'], mode='lines+markers', name='Deaths', line=dict(color='black', width=2)))
fig.add_trace(go.Scatter(x=df_cured_india['Date'], y=df_cured_india['Cured'], mode='lines+markers', name='Recovered', line=dict(color='orange', width=2)))
fig.update_layout(title='India COVID-19 Cases', xaxis_tickfont_size=14,yaxis=dict(title='Number of Cases'))
fig.show()

#Analyzing State-Wise Cases and Recovery

In [78]:
import numpy as np
state_cases = df.groupby('State/UnionTerritory')['Confirmed','Deaths','Cured'].max().reset_index()

#creating new columns.

state_cases['Active'] = state_cases['Confirmed'] - (state_cases['Deaths']+ state_cases['Cured'])
state_cases["Death Rate (per 100)"] = np.round(100*state_cases["Deaths"]/state_cases["Confirmed"],2)
state_cases["Cure Rate (per 100)"] = np.round(100*state_cases["Cured"]/state_cases["Confirmed"],2)

In [79]:
state_cases.head()

In [80]:
state_cases['State/UnionTerritory'].unique()  #returns a sorted array.

In [81]:
state_cases['State/UnionTerritory'].nunique()  #returns number of unique elements

#data of about 40 States and Union Territories are obtained.

#Analyzing the Confirmed,Recovered and Deaths on Each State

In [82]:
# Displaying all the columns/data stored in 'state_cases' based on the descending order of data in the column 'confirmed'
#specifying colours for each
state_wise_cases=state_cases.sort_values('Confirmed', ascending= False).style.background_gradient(cmap='plasma',subset=["Confirmed"])\
                        .background_gradient(cmap='hsv',subset=["Deaths"])\
                        .background_gradient(cmap='Greens',subset=["Cured"])\
                        .background_gradient(cmap='cividis',subset=["Active"])\
                        .background_gradient(cmap='RdYlBu',subset=["Death Rate (per 100)"])\
                        .background_gradient(cmap='Accent',subset=["Cure Rate (per 100)"])

#Displaying state-wise cases

In [83]:
state_wise_cases

#Inference:Maharastra is at the top of confirmed cases

#Checking the casualty Ratio

In [84]:
df['Fatality-Ratio'] = df['Deaths']/df['Confirmed']  #one more column of data is created called 'Fatalility-Ratio'
df['Fatality-Ratio']

#Casualty_ratio_among states

In [85]:
fatality_ratio=df.groupby('State/UnionTerritory')['Fatality-Ratio'].sum().reset_index()  #sorting values of the new column based on the 'State/UnionTerritory'
fatality_ratio.plot('State/UnionTerritory','Fatality-Ratio')     
ambade=fatality_ratio.sort_values('Fatality-Ratio', ascending= False).style.background_gradient(cmap='plasma',subset=["Fatality-Ratio"])
ambade

## Top 10 States with highest number of Deaths

In [86]:
# Top 10 states with maximum number of Deaths

#pandas package is used to group here

top_10=state_cases.groupby('State/UnionTerritory')['Deaths'].sum().sort_values(ascending=False).reset_index()

 #plot using plotly_graph objects
    
trace = go.Table(
    domain=dict(x=[0, 0.52],
                y=[0, 1.0]),
    header=dict(values=["State/UnionTerritory","Deaths Cases"],  # Headings and its styles are declared.
                fill = dict(color = '#119DFF'),
                font = dict(color = 'white', size = 14),
                align = ['center'],
               height = 30),
    cells=dict(values=[top_10['State/UnionTerritory'].head(10),top_10['Deaths'].head(10)],  # contents inside the cells are declared.
               fill = dict(color = ['salmon', 'white']),
               align = ['left'],height=20))

trace1 = go.Bar(x=top_10['State/UnionTerritory'].head(10),  #head(10) tells to take only 10 values from top of sorted ones
                y=top_10['Deaths'].head(10),
                xaxis='x1',
                yaxis='y1',
                marker=dict(color='darkred'),opacity=0.60)
layout = dict(
    width=1000,
    height=400,
    autosize=False,
    title='Top-10 States with Highest No. of Cases',
    showlegend=False,   
    xaxis1=dict(**dict(domain=[0.58, 1], anchor='y1', showticklabels=True)),
    yaxis1=dict(**dict(domain=[0, 1.0], anchor='x1', hoverformat='.1f')),  
)
fig1 = dict(data=[trace, trace1], layout=layout)
iplot(fig1)

## Testing Details

In [87]:
df_testing=pd.read_csv(r"../input/covid19-in-india/StatewiseTestingDetails.csv")  # Entering a new set of data. 
df_testing.head()

In [88]:
# States with Total Sample collections

#pandas package is used to group here

df_sample=df_testing.groupby('State')['TotalSamples'].sum().sort_values(ascending=False).reset_index() #sorting is done in descending order.
df_sample.head()

#positive cases rising in India

In [89]:
#pandas package is used to group here

df_positive=df_testing.groupby('State')['Positive'].sum().sort_values(ascending=False).reset_index()  
df_positive.head(10).iplot(kind='bar',x='State',y='Positive')

df_positive.plot('State','Positive')

#Vaccination analysis

In [90]:
df_vaccine=pd.read_csv(r'../input/covid19-in-india/covid_vaccine_statewise.csv')
df_vaccine.head()


##Vaccination data interms of pie chart as per gender

 



In [91]:
 #plot is using plotly.express
    
male = df_vaccine["Male(Individuals Vaccinated)"].sum() # Sum of all the values in the desired column is found to represent it in pie chart format.
female = df_vaccine["Female(Individuals Vaccinated)"].sum()  
trans = df_vaccine["Transgender(Individuals Vaccinated)"].sum()
px.pie(names=["Male Vaccinated","Female Vaccinated","Trans Gender"],values=[male,female,trans],title="Male and Female Vaccinated ratio for Covid19", color_discrete_sequence=px.colors.sequential.Viridis)


#covishield vs cowaxin

In [92]:
Covaxin = df_vaccine[" Covaxin (Doses Administered)"].sum() 
Covishield = df_vaccine["CoviShield (Doses Administered)"].sum()  
px.pie(names=["Covaxin Vaccinated","Covishield Vaccinated"],values=[Covaxin,Covishield],title="Covaxin and Covishield Vaccination",color_discrete_sequence=px.colors.sequential.PuBu)

#Doses administered vs people being vacccinated.

In [93]:
Doses = df_vaccine["Total Doses Administered"].sum() 
Vaccinated = df_vaccine["Total Individuals Vaccinated"].sum()  
px.pie(names=["Doses Administered","People Vaccinated"],values=[Doses,Vaccinated],title="Doses administered vs People Vaccinated",color_discrete_sequence=px.colors.sequential.Cividis_r)

#Analysis Karnataka of karnataka interms of number of people being vaccinated.

In [94]:
df_Karnataka=df_vaccine[df_vaccine['State']=='Karnataka']
Doses = df_Karnataka["Total Doses Administered"].sum() 
Vaccinated = df_Karnataka["Total Individuals Vaccinated"].sum()  
px.pie(names=["Doses Administered","People Vaccinated"],values=[Doses,Vaccinated],title="Doses administered vs People Vaccinated")

In [104]:
df_Karnataka.iplot(kind='scatter',x='Updated On',y='Total Individuals Vaccinated',mode="lines+markers")

#Threat Prediction(Upcoming cases vs Deaths)

#1.Predicting upcoming cases:

In [96]:
#df_confirmed_india contains values of the cases confirmed sorted previously as per date. 
df_confirmed_india.columns = ['ds','y']
 # To convert string Date time into Python Date time object.
#Datetime module supplies classes to work with date and time. These classes provide a number of functions to deal with dates, times and time intervals.
df_confirmed_india['ds'] = pd.to_datetime(df_confirmed_india['ds'])
 

In [110]:
from fbprophet import Prophet 
#Prophet is a forecasting procedure implemented in R and Python

m = Prophet()
m.fit(df_confirmed_india)  #fitting the model for our data input

#The make_future_dataframe function lets you specify the frequency and number of periods 
#you would like to forecast into the future. By default, the frequency is set to days
future = m.make_future_dataframe(periods=365) #forecasting 365 days into future

 

In [105]:
forecast = m.predict(future)
confirmed_model_plot=m.plot(forecast)  #its assigned to prevent a copy of the same waveform being printed.


#black line indicates the time/years covered till now.
#the beam shows the region within which the case's plot may fall

In [99]:
confirmed_forecast_plot =m.plot_components(forecast)  #its assigned to prevent a copy of the same waveform being printed.

2.Predicting possible deaths:

In [100]:
df_death_india.columns = ['ds','y']
df_death_india['ds'] = pd.to_datetime(df_death_india['ds'])
model = Prophet()
model.fit(df_death_india)
future = model.make_future_dataframe(periods=365)
prediction = model.predict(future)
death_model_plot=model.plot(prediction)
 

In [101]:
death_forecast_plot=model.plot_components(prediction)   