In [1]:
# Import the packages

import pandas as pd
import numpy as np

# Import the files

uk_cases = pd.read_csv('covid_19_uk_cases.csv')
uk_vaccinated = pd.read_csv('covid_19_uk_vaccinated.csv')
tweets = pd.read_csv('tweets.csv')

In [2]:
# View the Data
print(uk_cases.shape)
print(uk_cases.dtypes)
print(uk_vaccinated.shape)
print(uk_vaccinated.dtypes)
uk_cases

(7584, 12)
Province/State               object
Country/Region               object
Lat                         float64
Long                        float64
ISO 3166-1 Alpha 3-Codes     object
Sub-region Name              object
Intermediate Region Code      int64
Date                         object
Deaths                      float64
Cases                       float64
Recovered                   float64
Hospitalised                float64
dtype: object
(7584, 11)
Province/State               object
Country/Region               object
Lat                         float64
Long                        float64
ISO 3166-1 Alpha 3-Codes     object
Sub-region Name              object
Intermediate Region Code      int64
Date                         object
Vaccinated                    int64
First Dose                    int64
Second Dose                   int64
dtype: object


Unnamed: 0,Province/State,Country/Region,Lat,Long,ISO 3166-1 Alpha 3-Codes,Sub-region Name,Intermediate Region Code,Date,Deaths,Cases,Recovered,Hospitalised
0,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-22,0.0,0.0,0.0,0.0
1,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-23,0.0,0.0,0.0,0.0
2,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-24,0.0,0.0,0.0,0.0
3,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-25,0.0,0.0,0.0,0.0
4,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-26,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
7579,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-10,137735.0,8154306.0,0.0,378.0
7580,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-11,137763.0,8193769.0,0.0,386.0
7581,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-12,137944.0,8231437.0,0.0,386.0
7582,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-13,138080.0,8272883.0,0.0,0.0


# Identify the area that has the greatest number of people who received a first dose

In [3]:
# Sense checking the data which fields can be used as keys
print(uk_cases['ISO 3166-1 Alpha 3-Codes'].unique())

['AIA' 'BMU' 'VGB' 'CYM' 'Others' 'FLK' 'GIB' 'IMN' 'MSR' 'GBR' 'TCA']


In [4]:
# Since the province/state field looks more complete (12 entries vs 11) I will be using the province/state field
print(uk_cases['Province/State'].unique())

['Anguilla' 'Bermuda' 'British Virgin Islands' 'Cayman Islands'
 'Channel Islands' 'Falkland Islands (Malvinas)' 'Gibraltar' 'Isle of Man'
 'Montserrat' 'Saint Helena, Ascension and Tristan da Cunha'
 'Turks and Caicos Islands' 'Others']


In [5]:
# As both dataframes contain information on country, cases and vaccinations let's merge them first
# To make sure both have the region and the date we create a key for both tables on country region and date

uk_cases['key'] = uk_cases['Province/State'] + '|' + uk_cases['Date']

uk_vaccinated['key'] = uk_vaccinated['Province/State'] + '|' + uk_vaccinated['Date']

print(uk_cases['key'].head())
print(uk_vaccinated['key'].head())

uk_cases

0    Anguilla|2020-01-22
1    Anguilla|2020-01-23
2    Anguilla|2020-01-24
3    Anguilla|2020-01-25
4    Anguilla|2020-01-26
Name: key, dtype: object
0    Anguilla|2020-01-22
1    Anguilla|2020-01-23
2    Anguilla|2020-01-24
3    Anguilla|2020-01-25
4    Anguilla|2020-01-26
Name: key, dtype: object


Unnamed: 0,Province/State,Country/Region,Lat,Long,ISO 3166-1 Alpha 3-Codes,Sub-region Name,Intermediate Region Code,Date,Deaths,Cases,Recovered,Hospitalised,key
0,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-22,0.0,0.0,0.0,0.0,Anguilla|2020-01-22
1,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-23,0.0,0.0,0.0,0.0,Anguilla|2020-01-23
2,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-24,0.0,0.0,0.0,0.0,Anguilla|2020-01-24
3,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-25,0.0,0.0,0.0,0.0,Anguilla|2020-01-25
4,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-26,0.0,0.0,0.0,0.0,Anguilla|2020-01-26
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7579,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-10,137735.0,8154306.0,0.0,378.0,Others|2021-10-10
7580,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-11,137763.0,8193769.0,0.0,386.0,Others|2021-10-11
7581,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-12,137944.0,8231437.0,0.0,386.0,Others|2021-10-12
7582,Others,United Kingdom,55.3781,-3.4360,GBR,Northern Europe,0,2021-10-13,138080.0,8272883.0,0.0,0.0,Others|2021-10-13


In [6]:
# Now a merge will be attempted

covid_data = pd.merge(uk_cases, uk_vaccinated, how = 'inner', on = 'key')

covid_data.head()

Unnamed: 0,Province/State_x,Country/Region_x,Lat_x,Long_x,ISO 3166-1 Alpha 3-Codes_x,Sub-region Name_x,Intermediate Region Code_x,Date_x,Deaths,Cases,...,Country/Region_y,Lat_y,Long_y,ISO 3166-1 Alpha 3-Codes_y,Sub-region Name_y,Intermediate Region Code_y,Date_y,Vaccinated,First Dose,Second Dose
0,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-22,0.0,0.0,...,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-22,0,0,0
1,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-23,0.0,0.0,...,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-23,0,0,0
2,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-24,0.0,0.0,...,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-24,0,0,0
3,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-25,0.0,0.0,...,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-25,0,0,0
4,Anguilla,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-26,0.0,0.0,...,United Kingdom,18.2206,-63.0686,AIA,Latin America and the Caribbean,29,2020-01-26,0,0,0


In [7]:
# Create a new DF from the merged one 
covid_data_clean = covid_data[['Province/State_x', 'Country/Region_x', 'Sub-region Name_x',
                              'Date_x', 'Deaths', 'Cases',
                              'Vaccinated', 'First Dose', 'Second Dose']]

covid_data_clean

Unnamed: 0,Province/State_x,Country/Region_x,Sub-region Name_x,Date_x,Deaths,Cases,Vaccinated,First Dose,Second Dose
0,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-22,0.0,0.0,0,0,0
1,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-23,0.0,0.0,0,0,0
2,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-24,0.0,0.0,0,0,0
3,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-25,0.0,0.0,0,0,0
4,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-26,0.0,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...
7579,Others,United Kingdom,Northern Europe,2021-10-10,137735.0,8154306.0,1070,1216,1070
7580,Others,United Kingdom,Northern Europe,2021-10-11,137763.0,8193769.0,1300,1604,1300
7581,Others,United Kingdom,Northern Europe,2021-10-12,137944.0,8231437.0,1482,2027,1482
7582,Others,United Kingdom,Northern Europe,2021-10-13,138080.0,8272883.0,0,0,0


In [8]:
# Clean up the header names

covid_data_clean.rename(columns = {'Province/State_x': 'Province/State', 'Country/Region_x' : 'Country/Region',
                                   'Sub-region Name_x' : 'Sub-region', 'Date_x' : 'Date'}, inplace = True)

covid_data_clean

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_data_clean.rename(columns = {'Province/State_x': 'Province/State', 'Country/Region_x' : 'Country/Region',


Unnamed: 0,Province/State,Country/Region,Sub-region,Date,Deaths,Cases,Vaccinated,First Dose,Second Dose
0,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-22,0.0,0.0,0,0,0
1,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-23,0.0,0.0,0,0,0
2,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-24,0.0,0.0,0,0,0
3,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-25,0.0,0.0,0,0,0
4,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-26,0.0,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...
7579,Others,United Kingdom,Northern Europe,2021-10-10,137735.0,8154306.0,1070,1216,1070
7580,Others,United Kingdom,Northern Europe,2021-10-11,137763.0,8193769.0,1300,1604,1300
7581,Others,United Kingdom,Northern Europe,2021-10-12,137944.0,8231437.0,1482,2027,1482
7582,Others,United Kingdom,Northern Europe,2021-10-13,138080.0,8272883.0,0,0,0


In [9]:
# Check for unique Sub-Regions
print(covid_data_clean['Sub-region'].unique())

['Latin America and the Caribbean' 'Northern America' 'Northern Europe'
 'Southern Europe']


In [10]:
# Convert the date from obj to datetime
covid_data_clean['Date'] = pd.to_datetime(covid_data_clean['Date'])

print(covid_data_clean.dtypes)

Province/State            object
Country/Region            object
Sub-region                object
Date              datetime64[ns]
Deaths                   float64
Cases                    float64
Vaccinated                 int64
First Dose                 int64
Second Dose                int64
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_data_clean['Date'] = pd.to_datetime(covid_data_clean['Date'])


In [11]:
# Now we have a DataSet that we can work with, Let's determine the number of cases across the UK

cases_province = covid_data_clean.groupby('Province/State')

In [12]:
cases_province.sum().sort_values('Cases', ascending = False)

Unnamed: 0_level_0,Deaths,Cases,Vaccinated,First Dose,Second Dose
Province/State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Others,46987145.0,1621651000.0,2466669,2583151,2466669
Channel Islands,37130.0,1957978.0,3139385,3287646,3139385
Gibraltar,25412.0,1413853.0,5606041,5870786,5606041
Isle of Man,15051.0,887133.0,4036345,4226984,4036345
Turks and Caicos Islands,5612.0,752618.0,2915136,3052822,2915136
Bermuda,10353.0,685442.0,2690908,2817981,2690908
British Virgin Islands,3573.0,284961.0,4933315,5166303,4933315
Cayman Islands,911.0,217756.0,3363624,3522476,3363624
Anguilla,24.0,35315.0,4709072,4931470,4709072
Falkland Islands (Malvinas),0.0,20482.0,3587869,3757307,3587869


In [14]:
# Let's check the proportion of people who only got one dose of the vaccination. 
covid_data_clean['FirstDoseOnly'] = covid_data_clean['First Dose'] - covid_data_clean['Second Dose']

# The total number of people who are either first or second dose can then be calculated by second + first only
covid_data_clean['VaccinatedPopulation'] = covid_data_clean['Second Dose'] + covid_data_clean['FirstDoseOnly']

# Now I can identify the proportion of the total population who only received a fist dose 
covid_data_clean['FirstDoseProportion'] = covid_data_clean['FirstDoseOnly'] / covid_data_clean['VaccinatedPopulation']

covid_data_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_data_clean['FirstDoseOnly'] = covid_data_clean['First Dose'] - covid_data_clean['Second Dose']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_data_clean['VaccinatedPopulation'] = covid_data_clean['Second Dose'] + covid_data_clean['FirstDoseOnly']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-

Unnamed: 0,Province/State,Country/Region,Sub-region,Date,Deaths,Cases,Vaccinated,First Dose,Second Dose,FirstDoseOnly,VaccinatedPopulation,FirstDoseProportion
0,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-22,0.0,0.0,0,0,0,0,0,
1,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-23,0.0,0.0,0,0,0,0,0,
2,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-24,0.0,0.0,0,0,0,0,0,
3,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-25,0.0,0.0,0,0,0,0,0,
4,Anguilla,United Kingdom,Latin America and the Caribbean,2020-01-26,0.0,0.0,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
7579,Others,United Kingdom,Northern Europe,2021-10-10,137735.0,8154306.0,1070,1216,1070,146,1216,0.120066
7580,Others,United Kingdom,Northern Europe,2021-10-11,137763.0,8193769.0,1300,1604,1300,304,1604,0.189526
7581,Others,United Kingdom,Northern Europe,2021-10-12,137944.0,8231437.0,1482,2027,1482,545,2027,0.268870
7582,Others,United Kingdom,Northern Europe,2021-10-13,138080.0,8272883.0,0,0,0,0,0,


In [16]:
# Identify the area that has the greatest number of people who received a first dose but not a second one
cases_province = covid_data_clean.groupby('Province/State')
cases_province.sum().sort_values('FirstDoseOnly', ascending = False)

Unnamed: 0_level_0,Deaths,Cases,Vaccinated,First Dose,Second Dose,FirstDoseOnly,VaccinatedPopulation,FirstDoseProportion
Province/State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Gibraltar,25412.0,1413853.0,5606041,5870786,5606041,264745,5870786,-252.854055
Montserrat,539.0,9556.0,5157560,5401128,5157560,243568,5401128,-252.85049
British Virgin Islands,3573.0,284961.0,4933315,5166303,4933315,232988,5166303,-252.846322
Anguilla,24.0,35315.0,4709072,4931470,4709072,222398,4931470,-252.843137
Isle of Man,15051.0,887133.0,4036345,4226984,4036345,190639,4226984,-252.838177
Falkland Islands (Malvinas),0.0,20482.0,3587869,3757307,3587869,169438,3757307,-252.84792
Cayman Islands,911.0,217756.0,3363624,3522476,3363624,158852,3522476,-252.853196
Channel Islands,37130.0,1957978.0,3139385,3287646,3139385,148261,3287646,-252.844271
Turks and Caicos Islands,5612.0,752618.0,2915136,3052822,2915136,137686,3052822,-252.841623
Bermuda,10353.0,685442.0,2690908,2817981,2690908,127073,2817981,-252.852192


In [17]:
# To show this information on a higher level I am going to do this as well on region level
cases_province = covid_data_clean.groupby('Sub-region')
cases_province.sum().sort_values('FirstDoseOnly', ascending = False)

Unnamed: 0_level_0,Deaths,Cases,Vaccinated,First Dose,Second Dose,FirstDoseOnly,VaccinatedPopulation,FirstDoseProportion
Sub-region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Latin America and the Caribbean,10659.0,1320688.0,24666576,25831506,24666576,1164930,25831506,-1517.082688
Northern Europe,47039330.0,1624497000.0,11884820,12446091,11884820,561271,12446091,-1011.386154
Southern Europe,25412.0,1413853.0,5606041,5870786,5606041,264745,5870786,-252.854055
Northern America,10353.0,685442.0,2690908,2817981,2690908,127073,2817981,-252.852192


In [43]:
# LEt's check if there is a difference over time for the first and second dose application

cases_over_time = covid_data_clean[['Province/State','Date','FirstDoseOnly', 'FirstDoseProportion']]

cases_over_time['Month'] = cases_over_time['Date'].dt.month
cases_over_time['Year'] = cases_over_time['Date'].dt.year

cases_over_time['dateindex'] = cases_over_time['Date'].dt.to_period('M')
print(cases_over_time.dtypes)
cases_over_time

Province/State                 object
Date                   datetime64[ns]
FirstDoseOnly                   int64
FirstDoseProportion           float64
Month                           int64
Year                            int64
dateindex                   period[M]
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cases_over_time['Month'] = cases_over_time['Date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cases_over_time['Year'] = cases_over_time['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cases_over_time['dateindex'] = cases_over_time['Date'].dt.to_period('M')


Unnamed: 0,Province/State,Date,FirstDoseOnly,FirstDoseProportion,Month,Year,dateindex
0,Anguilla,2020-01-22,0,,1,2020,2020-01
1,Anguilla,2020-01-23,0,,1,2020,2020-01
2,Anguilla,2020-01-24,0,,1,2020,2020-01
3,Anguilla,2020-01-25,0,,1,2020,2020-01
4,Anguilla,2020-01-26,0,,1,2020,2020-01
...,...,...,...,...,...,...,...
7579,Others,2021-10-10,146,0.120066,10,2021,2021-10
7580,Others,2021-10-11,304,0.189526,10,2021,2021-10
7581,Others,2021-10-12,545,0.268870,10,2021,2021-10
7582,Others,2021-10-13,0,,10,2021,2021-10


In [44]:
cases_grouped = cases_over_time.groupby(pd.Grouper(freq = 'm', key = 'Date'))['FirstDoseProportion'].last().\
ffill().reset_index()

cases_grouped

Unnamed: 0,Date,FirstDoseProportion
0,2020-01-31,
1,2020-02-29,
2,2020-03-31,
3,2020-04-30,
4,2020-05-31,
5,2020-06-30,
6,2020-07-31,
7,2020-08-31,
8,2020-09-30,
9,2020-10-31,


In [45]:
# Pivoting the data so that we get the provinces on columns 
cases_pivot = cases_over_time.pivot(index = 'Date' , columns = 'Province/State', values = 'FirstDoseProportion')

cases_pivot.dropna()

Province/State,Anguilla,Bermuda,British Virgin Islands,Cayman Islands,Channel Islands,Falkland Islands (Malvinas),Gibraltar,Isle of Man,Montserrat,Others,"Saint Helena, Ascension and Tristan da Cunha",Turks and Caicos Islands
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-01-11,0.856824,0.856864,0.856874,0.856815,0.856819,0.856884,0.856843,0.856858,0.856869,0.856874,0.856907,0.856840
2021-01-12,0.922629,0.922632,0.922646,0.922632,0.922606,0.922651,0.922644,0.922628,0.922658,0.922599,0.922662,0.922655
2021-01-13,0.965072,0.965047,0.965063,0.965059,0.965072,0.965046,0.965068,0.965067,0.965054,0.965063,0.965082,0.965087
2021-01-14,0.983400,0.983423,0.983408,0.983412,0.983400,0.983383,0.983404,0.983405,0.983388,0.983408,0.983391,0.983386
2021-01-15,0.987667,0.987695,0.987668,0.987656,0.987653,0.987658,0.987671,0.987662,0.987669,0.987694,0.987695,0.987650
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-08,0.082657,0.082588,0.082640,0.082492,0.082507,0.082840,0.082555,0.082749,0.082602,0.082616,0.082702,0.083010
2021-10-09,-0.066444,-0.066348,-0.066319,-0.066242,-0.065969,-0.066083,-0.066514,-0.066171,-0.066482,-0.066010,-0.066242,-0.066144
2021-10-10,0.120207,0.119910,0.120066,0.120024,0.119586,0.119910,0.120159,0.120101,0.119984,0.120066,0.119457,0.120390
2021-10-11,0.190010,0.189714,0.189779,0.190128,0.190010,0.189803,0.189797,0.189714,0.189866,0.189526,0.189856,0.189873


In [47]:
# View the dataframe as a whole and remove the dates where there are no entries

pd.set_option('display.max_rows', None, 'display.max_columns', None)
print(cases_pivot.dropna())

Province/State  Anguilla   Bermuda  British Virgin Islands  Cayman Islands  \
Date                                                                         
2021-01-11      0.856824  0.856864                0.856874        0.856815   
2021-01-12      0.922629  0.922632                0.922646        0.922632   
2021-01-13      0.965072  0.965047                0.965063        0.965059   
2021-01-14      0.983400  0.983423                0.983408        0.983412   
2021-01-15      0.987667  0.987695                0.987668        0.987656   
2021-01-16      0.991156  0.991164                0.991159        0.991175   
2021-01-17      0.988227  0.988252                0.988236        0.988217   
2021-01-18      0.977646  0.977624                0.977637        0.977656   
2021-01-19      0.989038  0.989024                0.989059        0.989043   
2021-01-20      0.990620  0.990601                0.990622        0.990610   
2021-01-21      0.993261  0.993250                0.993257      