In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,OneHotEncoder,LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
Data=pd.read_csv('/Users/alok.g.v/Desktop/project/Covid 19/covid_19_india.csv')

In [None]:
Data.head()

In [None]:
Data.info()

In [None]:
Data['Date']=pd.to_datetime(Data['Date'])
Data.info()

# Grouping Data via States according to the total number of confirmed cases

In [None]:
result=Data.groupby('State/UnionTerritory')['Confirmed'].agg('sum').reset_index().sort_values('Confirmed',ascending=False)
result

# Tree map to show the number of people affected by covid in each state

In [None]:
import plotly.express as px
fig = px.treemap(result, path=[px.Constant("India"), 'State/UnionTerritory'], 
                 values='Confirmed', color='Confirmed')

In [None]:
fig.show()

In [None]:
# Observation from Tree map 
## Maharastra has had almost twice as many confirmed cases then karnataka which has the the 2nd highest number of confirmed cases

In [None]:
result.iloc[:10]

# Line plot of top 10 infected states showing the spread of Covid 
## Plot 1- Spread of covid in the top 10 infected states 
## Plot 2- The death of covid patients over the same time period
## Plot 3- The Recovery of covid patients in the same time period

In [None]:
t10=['Maharashtra','Karnataka','Kerala','Tamil Nadu','Andhra Pradesh','Uttar Pradesh','Delhi','West Bengal','Chhattisgarh','Rajasthan']
Data_t10=Data[Data['State/UnionTerritory'].isin(t10)]
plt.figure(figsize=(12,8))
sns.lineplot(x='Date',y='Confirmed',data=Data_t10,hue='State/UnionTerritory')

In [None]:
t10=['Maharashtra','Karnataka','Kerala','Tamil Nadu','Andhra Pradesh','Uttar Pradesh','Delhi','West Bengal','Chhattisgarh','Rajasthan']
Data_t10=Data[Data['State/UnionTerritory'].isin(t10)]
plt.figure(figsize=(12,8))
sns.lineplot(x='Date',y='Deaths',data=Data_t10,hue='State/UnionTerritory')

In [None]:
t10=['Maharashtra','Karnataka','Kerala','Tamil Nadu','Andhra Pradesh','Uttar Pradesh','Delhi','West Bengal','Chhattisgarh','Rajasthan']
Data_t10=Data[Data['State/UnionTerritory'].isin(t10)]
plt.figure(figsize=(12,8))
sns.lineplot(x='Date',y='Cured',data=Data_t10,hue='State/UnionTerritory')

# Observation 
## 1. The first wave started in April 2020 ,which shows a reduction of daily cases after the November 2020
## 2. The secound wave started in Maharastra in March 2021 but other states saw the start of the secound wave on April 2021
## 3. Kerala shows to have had the sharpest rise in cases and also the highest recovery rate 
## 4. Kerala also show a low death rate compared to the Death rate in other states 

# Indian Map representing teh number of confirmed cases per state

In [None]:
import plotly.express as px
import json
india_states=json.load(open('/Users/alok.g.v/Desktop/project/india states json/GeoJson-Data-of-Indian-States-master/Indian_States'))
fig = px.choropleth(
    Data,
    geojson=india_states,
    locations='State/UnionTerritory',
    color='Confirmed',
    featureidkey="properties.NAME_1",
    center={"lat": 23.2599, "lon": 77.4126},scope='asia'
)

fig.show()


# Indian Map showing the Death of covid patients 

In [None]:
fig = px.choropleth(
    Data,
    geojson=india_states,
    locations='State/UnionTerritory',
    color='Deaths',
    featureidkey="properties.NAME_1",
    center={"lat": 23.2599, "lon": 77.4126},scope='asia'
)

fig.show()



# Vaccine Data Analysis

In [None]:
Data_vaccine=pd.read_csv('/Users/alok.g.v/Desktop/project/Covid 19/vaccine_doses_statewise_v2.csv')
Data_vaccine['Vaccinated As of']=pd.to_datetime(Data_vaccine['Vaccinated As of'],infer_datetime_format=True)

In [None]:
Data_vaccine.info()

# Top ten states in vaccine distribution

In [None]:
result=Data_vaccine.groupby('State')['Total Doses Administered'].agg('max').reset_index().sort_values('Total Doses Administered',ascending=False)

result.drop(labels=33,inplace=True)
result.head(10)

# Tree map to show the number of people administered with covid vaccine of each state

In [None]:
fig = px.treemap(result, path=[px.Constant("India"), 'State'], 
                 values='Total Doses Administered', color='Total Doses Administered')
fig.show()

# Plot Displaying the total distribution of vaccines in the Top 10 states 

In [None]:
t10=['Maharashtra','Karnataka','Madhya Pradesh','Tamil Nadu','Gujarat','Uttar Pradesh','Bihar','West Bengal','Andhra Pradesh','Rajasthan']
Data_v_t10=Data_vaccine[Data_vaccine['State'].isin(t10)]
plt.figure(figsize=(12,8))
sns.lineplot(x='Vaccinated As of',y='Total Doses Administered',data=Data_v_t10,hue='State')

# Plot displaying the data on the first and second dose of the top 10 states 

In [None]:
plt.figure(figsize=(12,8))
sns.lineplot(x='Vaccinated As of',y='First Dose Administered',data=Data_v_t10,hue='State')

In [None]:
plt.figure(figsize=(12,8))
sns.lineplot(x='Vaccinated As of',y='Second Dose Administered',data=Data_v_t10,hue='State')

In [None]:
fig = px.choropleth(
    Data_v_t10,
    geojson=india_states,
    locations='State',
    color='Total Doses Administered',
    featureidkey="properties.NAME_1",
    center={"lat": 23.2599, "lon": 77.4126},scope='asia'
)

fig.show()



# Merging the vaccine data and covid spread data on the basis of date and state

In [None]:
Data_vaccine.rename(columns={'Vaccinated As of':'Date','State':'State/UnionTerritory'}, inplace=True)



Data_m = pd.merge(Data,Data_vaccine, how='inner', on=['Date','State/UnionTerritory'])
Data_m


# Following data shows the states with the most confirmed cases and the total vaccine distribution 

In [None]:
t10=['Maharashtra','Karnataka','Madhya Pradesh','Tamil Nadu','Gujarat','Uttar Pradesh','Bihar','West Bengal','Andhra Pradesh','Rajasthan']

Data_m_t10=Data_m[Data_m['State/UnionTerritory'].isin(t10)]



result=Data_m_t10.groupby('State/UnionTerritory')['Confirmed'].agg('sum').reset_index().sort_values('Confirmed',ascending=False)
result_v=Data_m_t10.groupby('State/UnionTerritory')['Total Doses Administered'].agg('max').reset_index().sort_values('Total Doses Administered',ascending=False)
result_m= pd.merge(result,result_v, how='inner', on='State/UnionTerritory')
result_m


In [None]:
result_m[['State/UnionTerritory','Confirmed','Total Doses Administered']].plot(x='State/UnionTerritory', kind='bar')