In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
covid = pd.read_csv("covid_19_india.csv")

In [3]:
covid.dtypes

Sno                          int64
Date                        object
Time                        object
State/UnionTerritory        object
ConfirmedIndianNational     object
ConfirmedForeignNational    object
Cured                        int64
Deaths                       int64
Confirmed                    int64
dtype: object

In [4]:
covid["Date"] = covid["Date"].astype("datetime64[ns]")

In [5]:
covid

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1
1,2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1
2,3,2020-01-02,6:00 PM,Kerala,2,0,0,0,2
3,4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3
4,5,2020-03-02,6:00 PM,Kerala,3,0,0,0,3
...,...,...,...,...,...,...,...,...,...
5331,5332,2020-08-18,8:00 AM,Telengana,-,-,72202,711,93937
5332,5333,2020-08-18,8:00 AM,Tripura,-,-,5404,62,7409
5333,5334,2020-08-18,8:00 AM,Uttarakhand,-,-,8485,158,12493
5334,5335,2020-08-18,8:00 AM,Uttar Pradesh,-,-,104808,2515,158216


In [6]:
covid["Month"] = covid["Date"].dt.month

In [7]:
col_drop = ["ConfirmedForeignNational","ConfirmedIndianNational","Sno","Time"]
covid.drop(covid[col_drop],axis = 1,inplace = True)

In [8]:
covid.columns

Index(['Date', 'State/UnionTerritory', 'Cured', 'Deaths', 'Confirmed',
       'Month'],
      dtype='object')

In [9]:
covid = covid.rename(columns = {"State/UnionTerritory":"State"})

In [10]:
covid[covid["State"] == "Telangana***"]

Unnamed: 0,Date,State,Cured,Deaths,Confirmed,Month
4526,2020-07-26,Telangana***,40334,455,52466,7


In [11]:
covid["State"] = covid["State"].str.strip("***")

In [12]:
covid[covid["State"] == "Unassigned"]

Unnamed: 0,Date,State,Cured,Deaths,Confirmed,Month
500,2020-03-30,Unassigned,0,0,46,3
528,2020-03-31,Unassigned,0,0,38,3
617,2020-03-04,Unassigned,0,0,77,3


In [13]:
covid = covid.drop([500,528,617])

In [14]:
covid["State"] = covid["State"].str.replace("Telengana","Telangana")
covid["State"] = covid["State"].replace({"Cases being reassigned to states":"Reassigned"
                            ,"Dadra and Nagar Haveli and Daman and Diu":"Daman and Diu"
                            ,"Daman & Diu" : "Daman and Diu"
                            ,"Dadar Nagar Haveli":"Dadra and Nagar Haveli"})

In [15]:
covid

Unnamed: 0,Date,State,Cured,Deaths,Confirmed,Month
0,2020-01-30,Kerala,0,0,1,1
1,2020-01-31,Kerala,0,0,1,1
2,2020-01-02,Kerala,0,0,2,1
3,2020-02-02,Kerala,0,0,3,2
4,2020-03-02,Kerala,0,0,3,3
...,...,...,...,...,...,...
5331,2020-08-18,Telangana,72202,711,93937,8
5332,2020-08-18,Tripura,5404,62,7409,8
5333,2020-08-18,Uttarakhand,8485,158,12493,8
5334,2020-08-18,Uttar Pradesh,104808,2515,158216,8


In [16]:
new_covid = covid.groupby(["State"])["Confirmed"].sum().to_frame()

In [17]:
data_plot = new_covid.sort_values("Confirmed",ascending = False)

In [18]:
new_covid = new_covid.reset_index()

In [19]:
import seaborn as sns

In [20]:
data_plot.reset_index(inplace = True)

In [21]:
# data_plot["State"] = data_plot["State"].replace({"Jammu and Kashmir": "J & K"
#                             ,"Cases being reassigned to states":"Reassigned"
#                             ,"Dadra and Nagar Haveli and Daman and Diu":"Daman & Diu"
#                             ,"Andaman and Nicobar Islands":"A&N Islands"
#                             ,"Dadar Nagar Haveli":"Daman & Diu"})

In [22]:
data_plot = data_plot.drop([21],axis = 0).reset_index()

In [23]:
data_plot

Unnamed: 0,index,State,Confirmed
0,0,Maharashtra,22437979
1,1,Tamil Nadu,11965369
2,2,Delhi,7896076
3,3,Andhra Pradesh,5795248
4,4,Karnataka,5142676
5,5,Uttar Pradesh,4170121
6,6,Gujarat,3750039
7,7,West Bengal,3305083
8,8,Telangana,2897368
9,9,Bihar,2377066


In [117]:
plt.figure(figsize =(9,5))
sns.barplot(x = "Confirmed",y = "State",data = data_plot,palette = "muted")
# plt.bar(x = "Confirmed",height = "State")
plt.xlabel("Confirmed ($10^7$)")
plt.gca().spines["right"].set_visible(False)
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["left"].set_visible(False)
plt.tick_params(left = False)
plt.gca().set_yticklabels(data_plot["State"],fontsize = 8 )
plt.title("Confirmed Cases till August")
plt.tight_layout()

<IPython.core.display.Javascript object>

In [25]:
top_3  = new_covid.set_index("State").apply(lambda x: x.nlargest(3)) ######FIRST SET INDEX TO STATE THEN USE NLARGEST

In [26]:
top_3

Unnamed: 0_level_0,Confirmed
State,Unnamed: 1_level_1
Maharashtra,22437979
Tamil Nadu,11965369
Delhi,7896076


### THE AGE GROUP



In [27]:
age_group = pd.read_csv("IndividualDetails.csv")

In [28]:
age_group.isnull().sum()

id                        0
government_id         25185
diagnosed_date            0
age                   25836
gender                22869
detected_city         25832
detected_district      6984
detected_state            0
nationality           25473
current_status            0
status_change_date      402
notes                  1335
dtype: int64

In [29]:
age_group = age_group.dropna()

In [30]:
age_group.isnull().sum()

id                    0
government_id         0
diagnosed_date        0
age                   0
gender                0
detected_city         0
detected_district     0
detected_state        0
nationality           0
current_status        0
status_change_date    0
notes                 0
dtype: int64

### CHILD 0- 16
### Young Adults  17-30
### Middle Aged Adults 31-45
### Old Age Adults above 45

In [31]:
age_group["age"] = age_group["age"].astype(int)

In [32]:
age_group

Unnamed: 0,id,government_id,diagnosed_date,age,gender,detected_city,detected_district,detected_state,nationality,current_status,status_change_date,notes
0,0,KL-TS-P1,30/01/2020,20,F,Thrissur,Thrissur,Kerala,India,Recovered,14/02/2020,Travelled from Wuhan
3,3,DL-P1,02/03/2020,45,M,East Delhi (Mayur Vihar),East Delhi,Delhi,India,Recovered,15/03/2020,"Travelled from Austria, Italy"
4,4,TS-P1,02/03/2020,24,M,Hyderabad,Hyderabad,Telangana,India,Recovered,02/03/2020,"Travelled from Dubai to Bangalore on 20th Feb,..."
28,28,DL-P2,05/03/2020,27,M,Janakpuri,South West Delhi,Delhi,India,Hospitalized,05/03/2020,"Travelled from Italy PayTm Emp,"
33,33,TN-P1,07/03/2020,45,M,Kancheepuram,Kancheepuram,Tamil Nadu,India,Recovered,07/03/2020,Travelled from Oman to Chennai Airport on 28.0...
...,...,...,...,...,...,...,...,...,...,...,...,...
19121,19122,KL-KN--P104,21/04/2020,32,F,Kottayam Malabar,Kannur,Kerala,India,Hospitalized,21/04/2020,Contact transmission
19130,19131,KL-KL-P10,21/04/2020,35,M,Kulathupuzha,Kollam,Kerala,India,Hospitalized,21/04/2020,"Travel history to Thenkashi, Tamil Nadu"
24837,24838,KL-KL-P12,25/04/2020,51,M,Kulathupuzha,Kollam,Kerala,India,Hospitalized,25/04/2020,"Contact transmission, friend of P19074"
24838,24839,KL-KL-P13,25/04/2020,47,F,Chathannoor,Kollam,Kerala,India,Hospitalized,25/04/2020,"Details awaited, ASHA Worker, random sampling"


In [33]:
child_data = age_group[age_group["age"] <= 16]

In [34]:
young_adults = age_group[(age_group["age"] >= 17) & (age_group["age"] <=30)]

In [35]:
middle_aged = age_group[(age_group["age"] >= 31) & (age_group["age"] <=45)]

In [36]:
old_age = age_group[age_group["age"] > 45]

In [37]:
child_number = child_data.shape[0]

In [38]:
yound_number = young_adults.shape[0]

In [39]:
middle_number = middle_aged.shape[0]

In [40]:
old_number = old_age.shape[0]

In [41]:
counts = [child_number,yound_number,middle_number,old_number]


In [42]:
names = ["Children","Young Adults","Middle Aged","Old Aged"]

In [118]:
plt.figure()
sns.barplot(x = counts,y = names,palette = "hls")
plt.title("Age wise Covid Cases till August")
plt.tight_layout()


<IPython.core.display.Javascript object>

### CONFIRMED/DEATH/RECOVERED

In [44]:
covid_19 = pd.read_csv("covid_19_india.csv",parse_dates = ["Date"])

In [45]:
covid_19

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1
1,2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1
2,3,2020-01-02,6:00 PM,Kerala,2,0,0,0,2
3,4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3
4,5,2020-03-02,6:00 PM,Kerala,3,0,0,0,3
...,...,...,...,...,...,...,...,...,...
5331,5332,2020-08-18,8:00 AM,Telengana,-,-,72202,711,93937
5332,5333,2020-08-18,8:00 AM,Tripura,-,-,5404,62,7409
5333,5334,2020-08-18,8:00 AM,Uttarakhand,-,-,8485,158,12493
5334,5335,2020-08-18,8:00 AM,Uttar Pradesh,-,-,104808,2515,158216


In [46]:
covid_19.drop(["ConfirmedIndianNational","ConfirmedForeignNational","Time","Sno"],axis = 1,inplace = True)

In [47]:
covid_19.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5336 entries, 0 to 5335
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date                  5336 non-null   datetime64[ns]
 1   State/UnionTerritory  5336 non-null   object        
 2   Cured                 5336 non-null   int64         
 3   Deaths                5336 non-null   int64         
 4   Confirmed             5336 non-null   int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 208.6+ KB


In [48]:
# covid_19["Date"] = pd.to_datetime(covid_19["Date"])

In [49]:
covid_19

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed
0,2020-01-30,Kerala,0,0,1
1,2020-01-31,Kerala,0,0,1
2,2020-01-02,Kerala,0,0,2
3,2020-02-02,Kerala,0,0,3
4,2020-03-02,Kerala,0,0,3
...,...,...,...,...,...
5331,2020-08-18,Telengana,72202,711,93937
5332,2020-08-18,Tripura,5404,62,7409
5333,2020-08-18,Uttarakhand,8485,158,12493
5334,2020-08-18,Uttar Pradesh,104808,2515,158216


### 1st Dataset : COVID

In [50]:
covid

Unnamed: 0,Date,State,Cured,Deaths,Confirmed,Month
0,2020-01-30,Kerala,0,0,1,1
1,2020-01-31,Kerala,0,0,1,1
2,2020-01-02,Kerala,0,0,2,1
3,2020-02-02,Kerala,0,0,3,2
4,2020-03-02,Kerala,0,0,3,3
...,...,...,...,...,...,...
5331,2020-08-18,Telangana,72202,711,93937,8
5332,2020-08-18,Tripura,5404,62,7409,8
5333,2020-08-18,Uttarakhand,8485,158,12493,8
5334,2020-08-18,Uttar Pradesh,104808,2515,158216,8


In [51]:
covid = covid.drop(covid[covid["State"] == "Reassigned"].index,axis = 0) ### important information for usage of drop

In [52]:
covid

Unnamed: 0,Date,State,Cured,Deaths,Confirmed,Month
0,2020-01-30,Kerala,0,0,1,1
1,2020-01-31,Kerala,0,0,1,1
2,2020-01-02,Kerala,0,0,2,1
3,2020-02-02,Kerala,0,0,3,2
4,2020-03-02,Kerala,0,0,3,3
...,...,...,...,...,...,...
5331,2020-08-18,Telangana,72202,711,93937,8
5332,2020-08-18,Tripura,5404,62,7409,8
5333,2020-08-18,Uttarakhand,8485,158,12493,8
5334,2020-08-18,Uttar Pradesh,104808,2515,158216,8


In [53]:
new_data =  covid.groupby("State")["Confirmed","Deaths"].sum()

  """Entry point for launching an IPython kernel.


In [54]:
new_data["Mortality Rate (per 100)"] = np.round((100*new_data["Deaths"])/new_data["Confirmed"],2)

In [55]:
new_data.reset_index().replace({"Daman & Diu":"Daman and Diu"})

Unnamed: 0,State,Confirmed,Deaths,Mortality Rate (per 100)
0,Andaman and Nicobar Islands,35819,328,0.92
1,Andhra Pradesh,5795248,58367,1.01
2,Arunachal Pradesh,61511,141,0.23
3,Assam,1838682,4291,0.23
4,Bihar,2377066,13777,0.58
5,Chandigarh,65390,963,1.47
6,Chhattisgarh,428382,2691,0.63
7,Dadra and Nagar Haveli,186,0,0.0
8,Daman and Diu,46920,70,0.15
9,Delhi,7896076,232066,2.94


In [56]:
new_data.sort_values("Confirmed",ascending = False).style.background_gradient(cmap = "Reds",subset = ["Mortality Rate (per 100)"])\
                                                    .background_gradient(cmap = "Blues",subset = ["Confirmed"])\
                                                    .background_gradient(cmap = "Purples",subset = ["Deaths"])

Unnamed: 0_level_0,Confirmed,Deaths,Mortality Rate (per 100)
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Maharashtra,22437979,842461,3.75
Tamil Nadu,11965369,179520,1.5
Delhi,7896076,232066,2.94
Andhra Pradesh,5795248,58367,1.01
Karnataka,5142676,93988,1.83
Uttar Pradesh,4170121,86356,2.07
Gujarat,3750039,177662,4.74
West Bengal,3305083,89612,2.71
Telangana,2897368,29967,1.03
Bihar,2377066,13777,0.58


### CORRELATION

In [57]:
new_data.corr().style.background_gradient(cmap = "Reds").format("{:.3f}")

Unnamed: 0,Confirmed,Deaths,Mortality Rate (per 100)
Confirmed,1.0,0.941,0.559
Deaths,0.941,1.0,0.613
Mortality Rate (per 100),0.559,0.613,1.0


In [58]:
# import geopandas as gpd

In [59]:
# df = r"india-polygon.shp"

In [60]:
# map_df = gpd.read_file(df)

In [61]:
# map_df

In [62]:
data_df = new_data.reset_index()

In [63]:
# new_df = pd.merge(data_df,map_df,how = "left",left_on = "State",right_on="st_nm")

In [64]:
# new_df

In [65]:
# new_df.drop(["State"],axis = 1,inplace = True)

In [66]:
data_df

Unnamed: 0,State,Confirmed,Deaths,Mortality Rate (per 100)
0,Andaman and Nicobar Islands,35819,328,0.92
1,Andhra Pradesh,5795248,58367,1.01
2,Arunachal Pradesh,61511,141,0.23
3,Assam,1838682,4291,0.23
4,Bihar,2377066,13777,0.58
5,Chandigarh,65390,963,1.47
6,Chhattisgarh,428382,2691,0.63
7,Dadra and Nagar Haveli,186,0,0.0
8,Daman and Diu,46920,70,0.15
9,Delhi,7896076,232066,2.94


In [67]:
import plotly.express as px

In [120]:
fig = px.choropleth(
    data_df,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State',
    color='Confirmed',
    color_continuous_scale="Sunsetdark"
)

fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(title_text = "Covid 19 Confirmed Cases")
fig.show()
plt.tight_layout()

In [94]:
fig2 = px.choropleth(
    data_df
    ,geojson = "https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson"
    ,featureidkey = "properties.ST_NM"
    ,locations = "State"
    ,color = "Deaths"
    ,color_continuous_scale = "Agsunset"
)

fig2.update_geos(fitbounds = "locations",visible = False)
fig2.update_layout(title_text = "Covid 19 Deaths")
fig2.show()

In [95]:
fig3 = px.choropleth(
    data_df
    ,geojson = "https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson"
    ,featureidkey = "properties.ST_NM"
    ,locations = "State"
    ,color = "Mortality Rate (per 100)"
    ,color_continuous_scale = "YlOrRd"
)

fig3.update_geos(fitbounds = "locations",visible = False)
fig3.update_layout(title_text = "Covid 19 Mortality Rate")
fig3.show()

In [121]:
sns.pairplot(new_data,diag_kind = "kde",palette = "husl");
plt.tight_layout()

<IPython.core.display.Javascript object>

In [116]:

sns.jointplot(new_data["Confirmed"],new_data["Deaths"],kind = "kde",space = 0)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x7f281e20d710>

In [124]:
plt.figure()
sns.barplot(x = "Mortality Rate (per 100)", y = new_data.index,data = new_data)
plt.gca().set_yticklabels(new_data.index,fontsize = 8 )
plt.tight_layout()

<IPython.core.display.Javascript object>