In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('../input/covid19-corona-virus-india-dataset/state_level_latest.csv'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Problem Statement ::
#### As the problem statement deals with the supply of vaccine to the state in India which has highest priority

### Importing necessary libraries

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

### Reading the dataset

In [None]:
state_data = pd.read_csv("../input/covid19-corona-virus-india-dataset/state_level_latest.csv")
state_data.drop(state_data.head(1).index,inplace=True) 
state_data.head()

In [None]:
state_data.shape  #dimensions of the data

### checking for NaN values

In [None]:
state_data.isna().any()  

### Filling the NAN values using appropriate methods

In [None]:
state_data.fillna(method = 'bfill')  
state_data.fillna(method = 'ffill') 

### Deleting the unrelated features

In [None]:
del state_data["Migrated_Other"]
del state_data["Delta_Confirmed"]
del state_data['Delta_Deaths']
del state_data['Delta_Recovered']
del state_data['State_Notes']
del state_data['Last_Updated_Time']
del state_data['State_code']

### Rechecking for NAN Values

In [None]:
state_data.isna().any()

### Correlation between features

In [None]:
cor = state_data.corr(method ='pearson')
cor

### Data Visualization

### Heatmap

In [None]:
sns.heatmap(cor,annot=True)

### Bar plot

In [None]:
import plotly.express as px
fig = px.bar(state_data, x="State", y="Deaths", color='State', text = 'Deaths', orientation='v', height=600,width=1000,
             title='Deceased Cases')
fig.show()

In [None]:
fig = px.bar(state_data, x="State", y="Recovered", color='State', text = 'Recovered', orientation='v', height=600,width=1000,
             title='Recovered Cases')
fig.show()

In [None]:
fig = px.bar(state_data, x="State", y="Confirmed",color="State", text = 'Confirmed', orientation='v', height=600,width=1000,
             title='CORONA CASES IN INDIA')
fig.show()

### Pie Chart

In [None]:
# plot for total number of covid cases in india
fig = px.pie(state_data, values = 'Confirmed',names='State', height=600,title="CORONA CASES IN INDIA")
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))

fig.show()

### Line Plot

In [None]:
# plotting the confirmed,recovered,deceased cases of all the states
line_data = state_data.groupby('State').sum().reset_index()

line_data = line_data.melt(id_vars='State', 
                 value_vars=['Confirmed', 
                             'Recovered', 
                             'Deaths'], 
                 var_name='Ratio', 
                 value_name='Value')

fig = px.line(line_data, x="State", y="Value", color='Ratio', 
              title='Confirmed cases, Recovered cases, and Deceased')
fig.show()

### Relating features to built the model

In [None]:
df1 = state_data[['Confirmed','Active','Recovered','Deaths']]
df1.head()

### Applying Kmeans clustering

In [None]:
from sklearn.cluster import KMeans
kmeans_model = KMeans(n_clusters=5, random_state=0)
y_predict = kmeans_model.fit_predict(df1)
y_predict

### Adding the Clusters Column to the data

In [None]:
state_data['cluster']= y_predict+1
state_data.head()

### Observing the clusters using scatter plot

In [None]:
state_data1 = state_data[state_data.cluster==1]
state_data2 = state_data[state_data.cluster==2]
state_data3 = state_data[state_data.cluster==3]
state_data4 = state_data[state_data.cluster==4]
state_data5 = state_data[state_data.cluster==5]

# ploting the clusters[confrimed vs recovered]
plt.figure(figsize=(10,7))
plt.scatter(state_data1['Confirmed'],state_data1['Recovered'],color='green')
plt.scatter(state_data2['Confirmed'],state_data2['Recovered'],color='blue')
plt.scatter(state_data3['Confirmed'],state_data3['Recovered'],color='Red')
plt.scatter(state_data4['Confirmed'],state_data4['Recovered'],color='yellow')
plt.scatter(state_data5['Confirmed'],state_data5['Recovered'],color='black')
plt.xlabel('Confirmed')
plt.ylabel('Recovered')

## Conclusion

### By observing the above plot, the state which has black cluster(Maharashtra) should be given vaccine first ,next blue cluster(Tamil Nadu) and next red clusters(Karnataka, Delhi and Andhra Pradesh) and next yellow clusters and last green clusters based on the availability of vaccine.

### Blog Link

https://19pa1a0562.medium.com/prioritize-vaccine-delivery-in-india-state-wise-657ea80302ba