# R-Initialization

### R-Package Download

In [18]:
library(ggplot2) # For Plot Visualizations
library(car) # For Variance Inflation Factor function -- vif()
library(corrplot) # For Correlation Visualizations
library(data.table) # For Complex Data Frame Joins

# Data Initialization

### Data Download

In [92]:
# Downloading Vaccination Data
vaccinations = read.csv(url(paste0("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")))

# Downloading Testing Data
testing = read.csv(url(paste0("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/testing/covid-testing-all-observations.csv")))

# Downloading Hospitalization Data
hospitalization = read.csv(url(paste0("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/hospitalizations/covid-hospitalizations.csv")))

# Downloading Hospitalization Data
deaths = read.csv(url(paste0("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/jhu/new_deaths_per_million.csv")))
# deaths = read.csv(url(paste0("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/jhu/full_data.csv")))



In [93]:
head(vaccinations)
head(testing)
head(hospitalization)
head(deaths)

location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
Afghanistan,AFG,2021-02-22,0.0,0.0,,,,,0.0,0.0,,,,,
Afghanistan,AFG,2021-02-23,,,,,,1367.0,,,,,33.0,1367.0,0.003
Afghanistan,AFG,2021-02-24,,,,,,1367.0,,,,,33.0,1367.0,0.003
Afghanistan,AFG,2021-02-25,,,,,,1367.0,,,,,33.0,1367.0,0.003
Afghanistan,AFG,2021-02-26,,,,,,1367.0,,,,,33.0,1367.0,0.003
Afghanistan,AFG,2021-02-27,,,,,,1367.0,,,,,33.0,1367.0,0.003


Entity,ISO.code,Date,Source.URL,Source.label,Notes,Cumulative.total,Daily.change.in.cumulative.total,Cumulative.total.per.thousand,Daily.change.in.cumulative.total.per.thousand,X7.day.smoothed.daily.change,X7.day.smoothed.daily.change.per.thousand,Short.term.positive.rate,Short.term.tests.per.case
Afghanistan - tests performed,AFG,2022-01-29,http://www.emro.who.int/images/stories/coronavirus/covid-sitrep-28.pdf,WHO Regional Office for the Eastern Mediterranean,,853003.0,,21.272,,,,,
Afghanistan - tests performed,AFG,2022-01-30,,,,,,,,,,,
Afghanistan - tests performed,AFG,2022-01-31,,,,,,,,,,,
Afghanistan - tests performed,AFG,2022-02-01,,,,,,,,,,,
Afghanistan - tests performed,AFG,2022-02-02,,,,,,,,,,,
Afghanistan - tests performed,AFG,2022-02-03,,,,,,,,,,,


entity,iso_code,date,indicator,value
Algeria,DZA,2020-07-17,Daily ICU occupancy,62.0
Algeria,DZA,2020-07-17,Daily ICU occupancy per million,1.381
Algeria,DZA,2020-07-18,Daily ICU occupancy,67.0
Algeria,DZA,2020-07-18,Daily ICU occupancy per million,1.492
Algeria,DZA,2020-07-20,Daily ICU occupancy,64.0
Algeria,DZA,2020-07-20,Daily ICU occupancy per million,1.425


date,World,Afghanistan,Africa,Albania,Algeria,Andorra,Angola,Anguilla,Antigua.and.Barbuda,...,Uruguay,Uzbekistan,Vanuatu,Vatican,Venezuela,Vietnam,Wallis.and.Futuna,Yemen,Zambia,Zimbabwe
2020-01-22,0.0,,,,,,,,,...,,,,,,,,,,
2020-01-23,0.0,,,,,,,,,...,,,,,,,,,,
2020-01-24,0.001,,,,,,,,,...,,,,,,,,,,
2020-01-25,0.002,,,,,,,,,...,,,,,,,,,,
2020-01-26,0.002,,,,,,,,,...,,,,,,,,,,
2020-01-27,0.003,,,,,,,,,...,,,,,,,,,,


### Data Cleanup

In [133]:
countries = colnames(deaths[-1])
dates = deaths[1]
n_countries = length(countries)
n_dates = length(dates[,1])

v = rep(dates[,1],n_countries)
data.frame(date=v,location=rep(countries,each=n_dates),)

length(v)


# M = matrix(NA,ncol=3,nrow =n_dates*n_countries); #check the help file so that you know what this does!

# for (i in 1:n_dates*n_countries){
#     #YOUR CODE HERE
#     M[]
#     M[i] = i
# }

# for (i in range(n_countries))

# length(dates[,1])
# for (i in range(nrow(deaths))) {
#     print(deaths[i,1])
# }

date,location
2020-01-22,World
2020-01-23,World
2020-01-24,World
2020-01-25,World
2020-01-26,World
2020-01-27,World
2020-01-28,World
2020-01-29,World
2020-01-30,World
2020-01-31,World


In [66]:
testing <- transform(testing,location=sub(" .*", "", Entity)) # Creates location column, using regex on Entity column
colnames(testing)[colnames(testing)=="Date"]="date" # Renames Date column to date
colnames(hospitalization)[colnames(hospitalization)=="entity"]="location" # Renames Date column to date
# Restructure deaths

head(testing)
head(hospitalization)
# hospitalization

Entity,ISO.code,date,Source.URL,Source.label,Notes,Cumulative.total,Daily.change.in.cumulative.total,Cumulative.total.per.thousand,Daily.change.in.cumulative.total.per.thousand,X7.day.smoothed.daily.change,X7.day.smoothed.daily.change.per.thousand,Short.term.positive.rate,Short.term.tests.per.case,location
Afghanistan - tests performed,AFG,2022-01-29,http://www.emro.who.int/images/stories/coronavirus/covid-sitrep-28.pdf,WHO Regional Office for the Eastern Mediterranean,,853003.0,,21.272,,,,,,Afghanistan
Afghanistan - tests performed,AFG,2022-01-30,,,,,,,,,,,,Afghanistan
Afghanistan - tests performed,AFG,2022-01-31,,,,,,,,,,,,Afghanistan
Afghanistan - tests performed,AFG,2022-02-01,,,,,,,,,,,,Afghanistan
Afghanistan - tests performed,AFG,2022-02-02,,,,,,,,,,,,Afghanistan
Afghanistan - tests performed,AFG,2022-02-03,,,,,,,,,,,,Afghanistan


location,iso_code,date,indicator,value
Algeria,DZA,2020-07-17,Daily ICU occupancy,62.0
Algeria,DZA,2020-07-17,Daily ICU occupancy per million,1.381
Algeria,DZA,2020-07-18,Daily ICU occupancy,67.0
Algeria,DZA,2020-07-18,Daily ICU occupancy per million,1.492
Algeria,DZA,2020-07-20,Daily ICU occupancy,64.0
Algeria,DZA,2020-07-20,Daily ICU occupancy per million,1.425


# Data Analyses

### Method 1:

We are looking purely at the deaths as a function of everything else. This being `Date`, `Location`, `Daily Testing`, `Daily ICU Occipancy`. We are looking at this as a density, to allow for better comparison between countries of different population magnitudes. We merge our data into a set:

In [89]:
data.method1 = setDT(deaths)[setDT(vaccinations),on=c("location","date")] # Merges deaths & vaccinations data on Location and Date
data.method1 = setDT(data.method1)[setDT(testing),on=c("location","date")] # Merges testing data on Location and Date
data.method1 = setDT(data.method1)[setDT(hospitalization),on=c("location","date")] # Merges hospitalization data on Location and Date
# data.method1 = data.method1["indicator"]=="Daily ICU occupancy per million"
# head(data.method1[colnames(data.method1)=="indicator"])
# data.method1
head(data.method1)
# head(data.method1[,c(-7:-10,-11)])
colnames(data.method1)
data.method1 <- data.frame(date=data.method1$date,location=data.method1$location,
                           new_deaths=data.method1$new_deaths,new_tests=data.method1$Daily.change.in.cumulative.total)
head(data.method1)
length(data.method1[2])


date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths,...,Daily.change.in.cumulative.total,Cumulative.total.per.thousand,Daily.change.in.cumulative.total.per.thousand,X7.day.smoothed.daily.change,X7.day.smoothed.daily.change.per.thousand,Short.term.positive.rate,Short.term.tests.per.case,i.iso_code,indicator,value
2020-07-17,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy,62.0
2020-07-17,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy per million,1.381
2020-07-18,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy,67.0
2020-07-18,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy per million,1.492
2020-07-20,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy,64.0
2020-07-20,Algeria,,,,,,,,,...,,,,,,,,DZA,Daily ICU occupancy per million,1.425


date,location,new_deaths,new_tests
2020-07-17,Algeria,,
2020-07-17,Algeria,,
2020-07-18,Algeria,,
2020-07-18,Algeria,,
2020-07-20,Algeria,,
2020-07-20,Algeria,,


In [88]:
data.method1 = merge(x=deaths,y=vaccinations,by=c("location","date")) # Merges deaths & vaccinations data on Location and Date
data.method1 = merge(x=data.method1,y=testing,by=c("location","date")) # Merges testing data on Location and Date
data.method1 = merge(x=data.method1,y=hospitalization,by=c("location","date")) # Merges hospitalization data on Location and Date
data.method1 = data.method1[data.method1$indicator=="Daily ICU occupancy per million",]

# data.method1 = data.method1[colnames(data.method1)=='i']

data.method1 <- data.frame(date=data.method1$date,location=data.method1$location,
                           new_deaths=data.method1$new_deaths,new_tests=data.method1$Daily.change.in.cumulative.total,
                           ICU_per_mil=data.method1$value)

head(data.method1)
# data.method1
# merge(x = df1, y = df2, by = "CustomerId", all = TRUE)


date,location,new_deaths,new_tests,ICU_per_mil
2022-03-07,Algeria,2,,0.245
2020-12-29,Argentina,150,47852.0,73.5
2020-12-30,Argentina,145,52023.0,75.587
2021-01-02,Argentina,56,30025.0,75.543
2021-01-03,Argentina,107,25735.0,75.433
2021-01-04,Argentina,152,50483.0,76.95
