# Rhode Island Scraper from Google Sheet/CSV

In [96]:
from selenium import webdriver
from bs4 import BeautifulSoup
import zipfile
import pandas as pd
from io import StringIO
import requests
import csv
import time
import random

## Creating Dataframe by exporting GSheet as CSV

In [77]:
# original link ->https://docs.google.com/spreadsheets/d/1c2QrNMz8pIbYEKzMJL7Uh2dtThOJa2j1sSMwiDo5Gz4/edit#gid=31350783
# edited link to export as CSV

# URL is specific to the tab for demographics, so it only downloads this tab as a CSV and not the others
RhodeIslandCSVUrl = "https://docs.google.com/spreadsheets/d/1c2QrNMz8pIbYEKzMJL7Uh2dtThOJa2j1sSMwiDo5Gz4/export?format=csv&gid=31350783"

#reading from CSV
RhodeIslandDemographicsData = pd.read_csv(RhodeIslandCSVUrl)

## Cleaning the Dataframe

In [78]:
# the CSV includes data we don't need like age and sex, let's get rid of that so that our data frame only includes race
# and ethnicity data

#drop first 20 rows
RhodeIslandDemographicsData = RhodeIslandDemographicsData.iloc[20:,]

# drop rows indexed 31 to 38
RhodeIslandDemographicsData = RhodeIslandDemographicsData.drop([31,32,33,34,35,36,37])

#rename columns
RhodeIslandDemographicsData.rename(columns = {'Unnamed: 0':'Category'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'All People Tested\n (Positive and Negative)':'All Tests'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'Unnamed: 2':'% from Tests'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'Cases':'Confirmed Cases'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'Unnamed: 4':'% from Cases'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'Unnamed: 6':'% from Hospitalizations'}, inplace = True)
RhodeIslandDemographicsData.rename(columns = {'Unnamed: 8':'% from Deaths'}, inplace = True)

# Mostly for aesthetic reasons, let's replace all 'Nan' values with '--', I find that this makes it easier to read
RhodeIslandDemographicsData = RhodeIslandDemographicsData.fillna('--')

# display dataframe
RhodeIslandDemographicsData

Unnamed: 0,Category,All Tests,% from Tests,Confirmed Cases,% from Cases,Hospitalizations,% from Hospitalizations,Deaths,% from Deaths
20,Race and Ethnicity,--,--,--,--,--,--,--,--
21,Hispanic or Latino†,28092,19%,10654,43%,949,36%,110,11%
22,American Indian or Alaska Native*,3682,3%,93,<1%,5,<1%,<5,--
23,Asian*,3575,2%,448,2%,47,2%,14,1%
24,Black or African American*,10776,7%,2865,12%,328,12%,62,6%
25,Native Hawaiian or Other Pacific Islander*,431,<1%,0,0%,0,0%,0,0%
26,White*,87100,59%,10051,41%,1249,47%,784,81%
27,Other race*,12622,9%,399,2%,53,2%,0,0%
28,Multiple race*,129,<1%,221,1%,9,<1%,0,0%
29,Declined,2976,--,276,--,9,--,0,--


## Now, let's do the same thing but to create a data frame for the case, death, and hospitalization totals

In [95]:
# Let's get the URL specific to the tab that has the totals, this is the 'Summary' tab. 
# Just like earlier, let's create our dataframe by reading this CSV

RhodeIslandTotalsUrl = "https://docs.google.com/spreadsheets/d/1c2QrNMz8pIbYEKzMJL7Uh2dtThOJa2j1sSMwiDo5Gz4/export?format=csv&gid=264100583"
RhodeIslandTotals = pd.read_csv(RhodeIslandTotalsUrl)

# The dataframe as it stands now has lots of data we don't need,
# let's clean it so we only display totals for cases, deaths, and hospitilizations

RhodeIslandTotals = RhodeIslandTotals.iloc[9:,]
RhodeIslandTotals = RhodeIslandTotals.drop([10,11,13,15,16,17,18,19,20,21,22,23,25,26])

#let's rename the rows to make it easier to read
RhodeIslandTotals.rename(index = {'Total deaths':'TOTAL DEATHS'}, inplace = True)
RhodeIslandTotals = RhodeIslandTotals.replace(['Cumulative people tested (one count per person)'],'Total People Tested')
RhodeIslandTotals = RhodeIslandTotals.replace(['Cumulative hospital admissions'],'Total Hospitalizations')
RhodeIslandTotals = RhodeIslandTotals.replace(['Cumulative people who tested positive (counts first positive lab per person)'],'Total Cases')

RhodeIslandTotals

Unnamed: 0,Last updated:,10/30/2020
9,Total Cases,32874
12,Total People Tested,432876
14,Total Hospitalizations,3309
24,Total deaths,1201
