In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
covidI = pd.read_csv('/content/archive.zip')

In [None]:
covidI.head()

Unnamed: 0,State/UTs,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
0,Andaman and Nicobar,10747,0,10618,129,0.0,98.8,1.2,100896618
1,Andhra Pradesh,2339078,7,2324338,14733,0.0,99.37,0.63,128500364
2,Arunachal Pradesh,66891,0,66595,296,0.0,99.56,0.44,658019
3,Assam,746100,0,738065,8035,0.0,98.92,1.08,290492
4,Bihar,851404,1,839100,12303,0.0,98.55,1.45,40100376


In [None]:
covidI.shape

(36, 9)

In [None]:
covidI.columns

Index(['State/UTs', 'Total Cases', 'Active', 'Discharged', 'Deaths',
       'Active Ratio', 'Discharge Ratio', 'Death Ratio', 'Population'],
      dtype='object')

In [None]:
covidI.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   State/UTs        36 non-null     object 
 1   Total Cases      36 non-null     int64  
 2   Active           36 non-null     int64  
 3   Discharged       36 non-null     int64  
 4   Deaths           36 non-null     int64  
 5   Active Ratio     36 non-null     float64
 6   Discharge Ratio  36 non-null     float64
 7   Death Ratio      36 non-null     float64
 8   Population       36 non-null     int64  
dtypes: float64(3), int64(5), object(1)
memory usage: 2.7+ KB


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
covidI.describe()

Unnamed: 0,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
count,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0
mean,1241145.0,56.527778,1226346.0,14742.388889,0.002222,98.893333,1.103889,39718610.0
std,1834031.0,215.847762,1808252.0,27170.089767,0.007601,0.498764,0.498479,50509130.0
min,10747.0,0.0,10618.0,4.0,0.0,97.54,0.03,66001.0
25%,105865.0,0.0,104864.0,1120.75,0.0,98.6725,0.8425,1695473.0
50%,612772.0,7.5,606357.0,6542.0,0.0,98.925,1.07,24100880.0
75%,1320822.0,27.5,1311256.0,14292.75,0.0,99.15,1.3275,69799860.0
max,8136945.0,1300.0,7988392.0,148419.0,0.04,99.97,2.46,231502600.0


In [None]:
covidI.isnull().sum()

State/UTs          0
Total Cases        0
Active             0
Discharged         0
Deaths             0
Active Ratio       0
Discharge Ratio    0
Death Ratio        0
Population         0
dtype: int64

In [None]:
covidI.duplicated().sum()

0

In [None]:
covidI['State/UTs'].unique()

array(['Andaman and Nicobar', 'Andhra Pradesh', 'Arunachal Pradesh',
       'Assam', 'Bihar', 'Chandigarh', 'Chhattisgarh',
       'Dadra and Nagar Haveli and Daman and Diu', 'Delhi', 'Goa',
       'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu and Kashmir',
       'Jharkhand', 'Karnataka', 'Kerala', 'Ladakh', 'Lakshadweep',
       'Madhya Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram',
       'Nagaland', 'Odisha', 'Puducherry', 'Punjab', 'Rajasthan',
       'Sikkim', 'Tamil Nadu', 'Telengana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'West Bengal'], dtype=object)

In [None]:
covidI['State/UTs'].nunique()

36

### State with Minimum number of Deaths

In [None]:
covidI[covidI['Deaths'] == covidI['Deaths'].min()]

Unnamed: 0,State/UTs,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
7,Dadra and Nagar Haveli and Daman and Diu,11591,0,11587,4,0.0,99.97,0.03,231502578


In [None]:
covidI['Deaths'].min()

4

### State with Maximum number of Deaths

In [None]:
covidI[covidI['Deaths'] == covidI['Deaths'].max()]

Unnamed: 0,State/UTs,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
20,Maharashtra,8136945,134,7988392,148419,0.0,98.17,1.82,399001


In [None]:
covidI['Deaths'].max()

148419

### Top 5 States with Highest Deaths

In [None]:
covidI[['State/UTs','Deaths']].sort_values(by ='Deaths', ascending=False)[:5]

Unnamed: 0,State/UTs,Deaths
20,Maharashtra,148419
16,Kerala,71570
15,Karnataka,40308
30,Tamil Nadu,38049
8,Delhi,26522


In [None]:
x = covidI[['State/UTs','Deaths']].sort_values(by ='Deaths', ascending=False)['State/UTs'][:10].values
y = covidI[['State/UTs','Deaths']].sort_values(by ='Deaths', ascending=False)['Deaths'][:10].values

df = pd.DataFrame({'State/UTs':x,
                  'Deaths':y })

fig = px.bar(df,
             x='State/UTs',
             y='Deaths',
             color='State/UTs',
             title='State/UTs Vs No. of Deaths'
            )
fig.show()


In [None]:
covidI[['State/UTs','Active']]

Unnamed: 0,State/UTs,Active
0,Andaman and Nicobar,0
1,Andhra Pradesh,7
2,Arunachal Pradesh,0
3,Assam,0
4,Bihar,1
5,Chandigarh,3
6,Chhattisgarh,8
7,Dadra and Nagar Haveli and Daman and Diu,0
8,Delhi,10
9,Goa,15


In [None]:
x = covidI[['State/UTs','Active']].sort_values(by = 'Active',ascending=False)[:10]['State/UTs'].values
y = covidI[['State/UTs','Active']].sort_values(by = 'Active',ascending=False)[:10]['Active'].values

df = pd.DataFrame({'State/UTs':x,
                  'Active Cases':y })

fig = px.pie(df,
             names='State/UTs',
             values='Active Cases',
             color='State/UTs',
             title='State/UTs Vs No. of Active Cases',
             hole=0.3,
             height = 800,
             width = 800
            )
fig.update_traces(textposition='outside', textinfo='value+label',
             pull=[0,0.1,0.2,0.3,0.4,0.1,0.2,0.1,0.1,0.3])


fig.show()

###Top 10 States with Highest Number of Discharged Patients

In [None]:
 covidI[['State/UTs','Discharged']].sort_values(by = 'Discharged',ascending=False)[:10]['State/UTs'].values

array(['Maharashtra', 'Kerala', 'Karnataka', 'Tamil Nadu',
       'Andhra Pradesh', 'Uttar Pradesh', 'West Bengal', 'Delhi',
       'Odisha', 'Rajasthan'], dtype=object)

In [None]:
x = covidI[['State/UTs','Discharged']].sort_values(by = 'Discharged',ascending=False)[:10]['State/UTs'].values
y = covidI[['State/UTs','Discharged']].sort_values(by = 'Discharged',ascending=False)[:10]['Discharged'].values


df = pd.DataFrame({'State/UTs':x,
                  'Discharged':y })

fig = px.scatter(df,
             x='State/UTs',
             y='Discharged',
             color='State/UTs',
             title='State/UTs Vs No. of Discharged'
            )
fig.show()


In [None]:
x = covidI[['State/UTs','Total Cases']].sort_values(by = 'Total Cases',ascending=False)[:10]['State/UTs'].values
y = covidI[['State/UTs','Total Cases']].sort_values(by = 'Total Cases',ascending=False)[:10]['Total Cases'].values


df = pd.DataFrame({'State/UTs':x,
                  'Total Cases':y })

fig = px.line(df,
             x='State/UTs',
             y='Total Cases',
             markers = True,
             title='State/UTs Vs No. of Total Cases'
            )

fig.show()

### Active Cases is comparatively very less than Total cases or discharged

In [None]:
x = covidI['State/UTs'].values
y1 = covidI['Total Cases'].values
y2 = covidI['Active'].values


fig= go.Figure()

fig.add_trace(go.Scatter(x=x, y=y1, name= "Total"))
fig.add_trace(go.Scatter(x=x, y=y2, name= "Active"))

fig.show()

In [None]:
x = covidI['State/UTs'].values
y = covidI['Death Ratio'].values

df = pd.DataFrame({'State/UTs':x,
                  'Death Ratio':y })

fig = px.bar(df,
             x='Death Ratio',
             y='State/UTs',
             color='State/UTs', #color represents State/UTs
             title='State/UTs versus Death Ratio',
             orientation='h',
             height = 1000
            )
fig.show()
