# Using the Computational Tools of R to Map the Spread of COVID-19 on a County by County Basis
## David Akinyemi, Margaret Smith, Nicholas Beati
### April 30, 2020

In [1]:
# List packages required for this assignment
packages_to_check <- c("lubridate", "igraph", "plyr", "tidyverse", "RColorBrewer")

# Return list of packages that are not installed
packages <- installed.packages()
packages <- as.data.frame(packages)
packages <- as.character(packages$Package)
need_to_install <- packages_to_check[packages_to_check %in% packages == FALSE]

# Installs required packages
if(length(need_to_install > 0)) {
    for(p in 1:length(need_to_install)) {
        install.packages(need_to_install[p])
    }
}

# Loads required packages for this notebook
lapply(packages_to_check, require, character.only = TRUE)

Loading required package: lubridate

Attaching package: ‘lubridate’

The following object is masked from ‘package:base’:

    date

Loading required package: igraph

Attaching package: ‘igraph’

The following objects are masked from ‘package:lubridate’:

    %--%, union

The following objects are masked from ‘package:stats’:

    decompose, spectrum

The following object is masked from ‘package:base’:

    union

Loading required package: plyr

Attaching package: ‘plyr’

The following object is masked from ‘package:lubridate’:

    here

Loading required package: tidyverse
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──
[32m✔[39m [34mggplot2[39m 3.2.1     [32m✔[39m [34mpurrr  [39m 0.3.3
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 0.8.4
[32m✔[39m [34mtidyr  [39m 1.0.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0
“package ‘dplyr’ was built

In [2]:
## STEP 1: Coerces NYT COVID data into one row per county per week

# Read in the COVID data
COVID <- read.csv("us-counties.csv")
#print(head(COVID))

# Keep the date, fips and cases columns
keeps <- c("date", "fips", "cases")
COVID <- COVID[keeps]

# Drop rows for which there is no fips code (fips reads in as NA in this case)
COVID <- COVID[complete.cases(COVID), ]

# Convert dates (which read in as strings) to a date format readable by R
COVID$date <- as.POSIXct(COVID$date)

# Convert dates to "ceiling" - this means all round to the Sunday at the end of the week
COVID$date <- COVID$week <- ceiling_date(COVID$date, unit = "weeks")

# Keep only unique rows of the COVID df
# This means that if the same number is reported for the same place in the same week, only keep one copy
COVID <- unique(COVID)

# Create a list of all weeks (defined by the Sunday at the end of the week)
days.of.week <- weekdays(x=as.Date(seq(7), origin="1950-01-01"))

# Create an empty column that will store the week number
weeks <- data.frame(1:length(unique(COVID$week)), unique(COVID$week))
names(weeks)[1] <- "week_number"
names(weeks)[2] <- "weeks"

# Convert end-of-week-dates in COVID df to a week number (i.e. 2020-01-26 becomes 1, 2020-02-02 becomes 2, etc.)
for(w in 1:length(weeks$weeks)) {
       COVID$week_number[COVID$week == weeks$weeks[w]] <- w
    # Subsets the COVID dataframe to keep only the rows associated with one week from the weeks list, adds week number
}

# Drop the date column
COVID <- subset(COVID, select = -c(date))

# Create a list of all weeks (now converted to numbers)
# Create an empty df to hold the highest count for each county for each week
highest_count <- data.frame()

# Write a for-loop that does the following:
# 1) Subset COVID df into weeks.  
# 2) Subset each week subset df into counties
# 3) Keep only the highest count for each county for each week
# 4) Store the resulting df, which should have just the end count for every week for every county, in a new variable

for(w in 1:length(weeks$weeks)) {
    Working_week <- subset(COVID, COVID$week_number == w)
    Working_week <- (unique(Working_week))
    Working_week <- aggregate(Working_week$cases, by = list(Working_week$fips), max)
    Working_week$week_number <- w
    highest_count <- rbind.fill(highest_count, as.data.frame(Working_week))
}
names(highest_count)[1] <- "fips"
names(highest_count)[2] <- "cases"
highest_count <- as.data.frame(highest_count)
head(highest_count)

Unnamed: 0_level_0,fips,cases,week_number
Unnamed: 0_level_1,<int>,<int>,<int>
1,4013,1,1
2,6037,1,1
3,6059,1,1
4,17031,1,1
5,53061,1,1
6,4013,1,2


In [3]:
## STEP 2: Creates a data frame containing neighboring county

# Read in neighboring county data
NEIGHBORS <- read.csv("neighborcounties.csv")

# Drop all neighbor pairs where one of each pair does not appear in all_counties
NEIGHBORS <- NEIGHBORS[NEIGHBORS$orgfips %in% COVID$fips,]
NEIGHBORS <- NEIGHBORS[NEIGHBORS$adjfips %in% COVID$fips,]

In [4]:
## STEP 3: Creates a dataframe containing cumulative cases for each county each week

# Store the total number of weeks in a variable
totalweeks <- max(weeks$week_number)

# Store all county codes as a list
all_county_codes <- unique(highest_count$fips)

# Arrange counties in ascending order by fips code
all_county_codes <- all_county_codes[order(all_county_codes)]

# Create an empty dataframe with the same number of rows as counties, same number of columns as weeks
CC_matrix <- as.data.frame(matrix(nrow=length(all_county_codes), ncol=totalweeks, data <- 0))
colnames(CC_matrix) <- c(1:totalweeks)

# Attach counties as row names
row.names(CC_matrix) <- all_county_codes

# Loop through all of the weeks to count total cases per county each week
for(w in 1:totalweeks) {
    subset_week <- highest_count[(highest_count$week_number == w),]     # Subset the df produced in step 1 to just one week
    for(r in 1:nrow(subset_week)) {     # Loop through the rows in the subset
        county_id <- as.numeric(subset_week$fips[r])         # Store the county code in a variable
        number_cases <- as.numeric(subset_week$cases[r])     # Store the number of cases in a variable
        CC_matrix[which(as.numeric(row.names(CC_matrix)) == county_id), w] <- number_cases # Store the number of cases at the intersection of the county code and the week
    }
}

# Create a new df that contains only new cases each week 
CC_matrix_diff <-matrix(c(NA), ncol=ncol(CC_matrix), nrow=nrow(CC_matrix), byrow=TRUE)
cols <- ncol(CC_matrix) #14 should be the number of weeks
cols_minus_1 <- ncol(CC_matrix) - 1
rows <- nrow(CC_matrix) #Should be 2thousandish, the amount of counties
rownames(CC_matrix_diff)<-1:rows;colnames(CC_matrix_diff)<-1:cols

# Loop through each column of your cumulative totals df to keep only new cases of COVID
# (the number of new cases in week 2 will be the total number measured in week 2, minus the number measured in week 1)
for(i in 1:rows){for(j in 1:cols_minus_1){CC_matrix_diff[i,j+1]<-(as.numeric((CC_matrix)[i,j+1])-as.numeric((CC_matrix)[i,j]))}}
CC_matrix_diff <- as.data.frame(CC_matrix_diff)
CC_matrix_diff$"1" <- CC_matrix$"1"
CC_matrix_diff <- as.matrix(CC_matrix_diff)
rownames(CC_matrix_diff)<- rownames(CC_matrix)

In [5]:
## STEP 4: Produces an edgelist showing whether transmission happened in a two-week period

# Create a df containing two adjacent weeks of data
first_2_weeks <- CC_matrix_diff[,1:2]

# Create a df that holds all of the pairs of neighboring counties 
NEIGHBORS <- NEIGHBORS[c("orgfips", "adjfips")]

COVID_pairs <- as.data.frame(NEIGHBORS)
# Add a new column to the new dataframe to track whether there was transmission
COVID_pairs$transmission <- ""

In [6]:
# Loop through all neighbors to see which pairs meet the following conditions:
# 1) the original county (orgfips) had an increase in the number of cases of COVID in week 1
# 2) the adjacent county (adjfips) had an increase in the number of cases of COVID in week 2
for(n in 1:nrow(COVID_pairs)) {
    # Store the fips of the original county
    ## Wrap this in as.numeric, and use the column name orgfips
    orgfips <- as.numeric(COVID_pairs$orgfips[n])
    
    # Store the number of new cases for the original county in week 1
    new_cases_w1 <- as.numeric(first_2_weeks[which(row.names(first_2_weeks) == orgfips),1])
   
    # Store the fips of the adjacent county
    # Wrap this in as.numeric, and use the column name adjfips
    adjfips <- COVID_pairs$adjfips[n]
    
    # Store the number of new cases for the original county in week 2
    new_cases_w2 <- as.numeric(first_2_weeks[which(row.names(first_2_weeks) == adjfips),2])
    
    # Check whether there were new cases last week in the original county, and new cases this week in the adjacent county
    transmission <- new_cases_w1 > 0 & new_cases_w2 > 0
    
    # Store the resulting boolean in the transmission column
    COVID_pairs$transmission[n] <- transmission
}
# Subset dataframe, keeping only the edges connected by whether there was transmission
COVID_pairs <- COVID_pairs[which(COVID_pairs$transmission == TRUE),]
COVID_pairs <- COVID_pairs[,1:2]
COVID_pairs <- as.matrix(COVID_pairs)

In [7]:
##Now, if you want a function that does all of that, here is the structure

COVID_week <- function(first_week, second_week) {
    
    week_pair <- CC_matrix_diff[,first_week:second_week]
    
    COVID_pairs <- as.data.frame(NEIGHBORS)
    COVID_pairs$transmission <- ""
    
    for(n in 1:nrow(COVID_pairs)) {
        orgfips <- as.numeric(COVID_pairs$orgfips[n])
        new_cases_w1 <- as.numeric(week_pair[which(row.names(week_pair) == orgfips),1])
        adjfips <- COVID_pairs$adjfips[n]
        new_cases_w2 <- as.numeric(week_pair[which(row.names(week_pair) == adjfips),2])
        transmission <- new_cases_w1 > 0 & new_cases_w2 > 0
        COVID_pairs$transmission[n] <- transmission
    }

    COVID_pairs <- COVID_pairs[which(COVID_pairs$transmission == TRUE),]
    COVID_pairs <- COVID_pairs[,1:2]
    COVID_pairs <- as.matrix(COVID_pairs)
    row.names(COVID_pairs) <- NULL
    return(COVID_pairs)
}

In [8]:
#Choosing 3 2-week periods that we thought would be best to investigate to view the change
nrow(COVID_week(5,6))
nrow(COVID_week(10,11))
nrow(COVID_week(13,14))

In [9]:
plot_the_week_period <- function(COVID_week) {
    labels <- thenames$county_state[(as.data.frame(COVID_week))$orgfips %in% thenames$fips]
    
    m <- mapply(COVID_week, FUN=as.character)
    COVID_week <- matrix(data=m, ncol=ncol(COVID_week), nrow=nrow(COVID_week))
    
    COVID_week.g <- graph_from_edgelist(COVID_week, directed = TRUE)
    e.wt <- edge_attr(COVID_week.g, "weight")
    
    layout_for_g = layout_with_lgl(COVID_week.g)
    
    filename <- paste(COVID_week, ".png", sep = "")
    
    png(file= filename, width=4000, height=4000)
    plot(COVID_week.g, layout = layout_for_g, vertex.size=1, edge.width = e.wt * 0.001,vertex.label=labels)
    dev.off()
    return 
}

In [10]:
# Produces dataframes for each two-week block
week_5_6 <- COVID_week(5,6)
week_6_7 <- COVID_week(6,7)
week_7_8 <- COVID_week(7,8)
week_8_9 <- COVID_week(8,9)
week_9_10 <- COVID_week(9,10)
week_10_11 <- COVID_week(10,11)
week_11_12 <- COVID_week(11,12)
week_12_13 <- COVID_week(12,13)
week_13_14 <- COVID_week(13,14)

In [11]:
# Produces edge lists for each two-week block
# Starting with week 5 to 6 because there were not new cases in proximate counties before that
week_5_6.g <- graph_from_edgelist(week_5_6, directed = TRUE)
week_6_7.g <- graph_from_edgelist(week_6_7, directed = TRUE)
week_7_8.g <- graph_from_edgelist(week_7_8, directed = TRUE)
week_8_9.g <- graph_from_edgelist(week_8_9, directed = TRUE)
week_9_10.g <- graph_from_edgelist(week_9_10, directed = TRUE)
week_10_11.g <- graph_from_edgelist(week_10_11, directed = TRUE)
week_11_12.g <- graph_from_edgelist(week_11_12, directed = TRUE)
week_12_13.g <- graph_from_edgelist(week_12_13, directed = TRUE)
week_13_14.g <- graph_from_edgelist(week_13_14, directed = TRUE)

In [12]:
# Creates a dataframe containing one row for each county fips, and the state that county is in
county_states <- read.csv("us-counties.csv", header = TRUE)
county_states <- county_states[,c("state", "fips")]
county_states <- unique(county_states)
county_states <- county_states[order(county_states$fips),]
county_states$fips <- as.character(county_states$fips)

In [13]:
# Creates a vector of individual colors of length 50 (for 50 U.S. states)
# Each time you run this cell, you will get a different sample of 50 colors

qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',] # Lists all Color Brewer palettes that are qualitative
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) # Extracts all of the colors from all of the qualitative palettes
col_vector <- sample(col_vector, 50) # Samples 50 of those colors

In [14]:
m <- mapply(week_5_6, FUN=as.character)
week_5_6 <- matrix(data=m, ncol=ncol(week_5_6), nrow=nrow(week_5_6))
week_5_6.g <- graph_from_edgelist(week_5_6, directed = TRUE)

In [15]:
m <- mapply(week_6_7, FUN=as.character)
week_6_7 <- matrix(data=m, ncol=ncol(week_6_7), nrow=nrow(week_6_7))
week_6_7.g <- graph_from_edgelist(week_6_7, directed = TRUE)

In [16]:
m <- mapply(week_7_8, FUN=as.character)
week_7_8 <- matrix(data=m, ncol=ncol(week_7_8), nrow=nrow(week_7_8))
week_7_8.g <- graph_from_edgelist(week_7_8, directed = TRUE)

In [17]:
m <- mapply(week_8_9, FUN=as.character)
week_8_9 <- matrix(data=m, ncol=ncol(week_8_9), nrow=nrow(week_8_9))
week_8_9.g <- graph_from_edgelist(week_8_9, directed = TRUE)

In [18]:
m <- mapply(week_9_10, FUN=as.character)
week_9_10 <- matrix(data=m, ncol=ncol(week_9_10), nrow=nrow(week_9_10))
week_9_10.g <- graph_from_edgelist(week_9_10, directed = TRUE)

In [19]:
m <- mapply(week_10_11, FUN=as.character)
week_10_11 <- matrix(data=m, ncol=ncol(week_10_11), nrow=nrow(week_10_11))
week_10_11.g <- graph_from_edgelist(week_10_11, directed = TRUE)

In [20]:
m <- mapply(week_11_12, FUN=as.character)
week_11_12 <- matrix(data=m, ncol=ncol(week_11_12), nrow=nrow(week_11_12))
week_11_12.g <- graph_from_edgelist(week_11_12, directed = TRUE)

In [21]:
m <- mapply(week_12_13, FUN=as.character)
week_12_13 <- matrix(data=m, ncol=ncol(week_12_13), nrow=nrow(week_12_13))
week_12_13.g <- graph_from_edgelist(week_12_13, directed = TRUE)

In [22]:
m <- mapply(week_13_14, FUN=as.character)
week_13_14 <- matrix(data=m, ncol=ncol(week_13_14), nrow=nrow(week_13_14))
week_13_14.g <- graph_from_edgelist(week_13_14, directed = TRUE)

In [23]:
# The match() function compares two vectors, and locates the position of each item in the first vector in the second vector.
# So, running this cell would tell you what rows of county_states match the fips codes in the week_5_6.g network
# (Remember that the V() function returns the vertices (nodes) in a network)

# However, we want the names of the states, not just their positions in the list.
# To get the state names, we subset the state column of the county_states df using the indices returned above:

# To get the state names, we subset the state column of the county_states df using the indices returned above:
V(week_5_6.g)$state <- as.character(county_states$state[match(V(week_5_6.g)$name,county_states$fips)])
V(week_6_7.g)$state <- as.character(county_states$state[match(V(week_6_7.g)$name,county_states$fips)])
V(week_7_8.g)$state <- as.character(county_states$state[match(V(week_7_8.g)$name,county_states$fips)])
V(week_8_9.g)$state <- as.character(county_states$state[match(V(week_8_9.g)$name,county_states$fips)])
V(week_9_10.g)$state <- as.character(county_states$state[match(V(week_9_10.g)$name,county_states$fips)])
V(week_10_11.g)$state <- as.character(county_states$state[match(V(week_10_11.g)$name,county_states$fips)])
V(week_11_12.g)$state <- as.character(county_states$state[match(V(week_11_12.g)$name,county_states$fips)])
V(week_12_13.g)$state <- as.character(county_states$state[match(V(week_12_13.g)$name,county_states$fips)])
V(week_13_14.g)$state <- as.character(county_states$state[match(V(week_13_14.g)$name,county_states$fips)])

In [24]:
# Now we want to create a dataframe that holds the name of each state, and a color associated with that state
state_colors <- as.data.frame(matrix(ncol = 2, nrow = 50)) # Creates empty df
colnames(state_colors) <- c("state", "color") # names columns
state_colors$state <- state.name # uses the variable state.name (which is the names of all U.S. states) to populate one column
state_colors$color <- col_vector # uses the colors we samples above to populate the second column

In [25]:
# Now, we can match the states in each time-slice graph with a color, using the same structure we used above:
V(week_5_6.g)$color <- as.character(state_colors$color[match(V(week_5_6.g)$state,state_colors$state)])
V(week_6_7.g)$color <- as.character(state_colors$color[match(V(week_6_7.g)$state,state_colors$state)])
V(week_7_8.g)$color <- as.character(state_colors$color[match(V(week_7_8.g)$state,state_colors$state)])
V(week_8_9.g)$color <- as.character(state_colors$color[match(V(week_8_9.g)$state,state_colors$state)])
V(week_9_10.g)$color <- as.character(state_colors$color[match(V(week_9_10.g)$state,state_colors$state)])
V(week_10_11.g)$color <- as.character(state_colors$color[match(V(week_10_11.g)$state,state_colors$state)])
V(week_11_12.g)$color <- as.character(state_colors$color[match(V(week_11_12.g)$state,state_colors$state)])
V(week_12_13.g)$color <- as.character(state_colors$color[match(V(week_12_13.g)$state,state_colors$state)])
V(week_13_14.g)$color <- as.character(state_colors$color[match(V(week_13_14.g)$state,state_colors$state)])

In [68]:
# Now we can plot the connections and add a legend
png("week_5-6.png", width = 2000, height = 2000)
plot(week_5_6.g, cex.main = 20, vertex.label = NA, vertex.size = 2, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("bottomleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
dev.off()

In [53]:
# Now we can plot the connections and add a legend
png("week_6_7.png", width = 4000, height = 4000)
plot(week_6_7.g, main = "Week 6-7", cex.main =5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [28]:
# Now we can plot the connections and add a legend
png("week_7_8.png", width = 4000, height = 4000)
plot(week_7_8.g, main = "Week 7-8", cex.main =5, vertex.label = NA, vertex.size = 0.5, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [29]:
# Now we can plot the connections and add a legend
png("week_8_9.png", width = 4000, height = 4000)
plot(week_8_9.g, main = "Week 8-9", cex.main =5, vertex.label = NA, vertex.size = 0.5, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [30]:
# Now we can plot the connections and add a legend
png("week_9_10.png", width = 4000, height = 4000)
plot(week_9_10.g, main = "Week 9-10", cex.main =5, vertex.label = NA, vertex.size = 0.5, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [73]:
# Now we can plot the connections and add a legend
png("week_10_11.png", width = 2000, height = 2000)
plot(week_10_11.g, cex.main =5, vertex.label = NA, vertex.size = 2, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
dev.off()

In [32]:
# Now we can plot the connections and add a legend
png("week_11_12.png", width = 4000, height = 4000)
plot(week_11_12.g, main = "Week 11-12", cex.main =5, vertex.label = NA, vertex.size = 0.5, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [33]:
# Now we can plot the connections and add a legend
png("week_12_13.png", width = 4000, height = 4000)
plot(week_12_13.g, main = "Week 12-13", cex.main =5, vertex.label = NA, vertex.size = 0.5, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=.8, bty="n", ncol=1)
dev.off()

In [74]:
# Now we can plot the connections and add a legend
png("week_13_14.png", width = 2000, height = 2000)
plot(week_13_14.g, cex.main =5, vertex.label = NA, vertex.size = 2, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
legend("topleft", state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
dev.off()

In [35]:
png("all_weeks.png", width = 4000, height = 4000)
par(mfrow=c(3,3), oma = c(0,0,10,0),  mar=c(10,20,10,10), cex.main =5)
plot(week_5_6.g, main = "Week 5-6", cex.main =5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_6_7.g, main = "Week 6-7", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_7_8.g, main = "Week 7-8", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_8_9.g, main = "Week 8-9", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_9_10.g, main = "Week 9-10", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_10_11.g, main = "Week 10-11", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_11_12.g, main = "Week 11-12", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_12_13.g, main = "Week 12-13", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
plot(week_13_14.g, main = "Week 13-14", cex.main = 5, vertex.label = NA, vertex.size = 1, vertex.frame.color = NA, edge.arrow.size = 0.1, layout = layout_with_kk)
box(col="black")
legend("topleft", inset = c(-0.15, 0), state_colors$state, pch=21, col="#777777", pt.bg=state_colors$color, pt.cex=2, cex=2, bty="n", ncol=1)
mtext("The Network Spread of COVID-19", outer = TRUE, cex = 6)
dev.off()

# Week 5-6 Summary

In [36]:
#the g size is the number edges in the network
gsize(week_5_6.g)

#vcount tells us the number of nodes, vertexes, or actors in the network
vcount(week_5_6.g)

#transitivity number of closed triples compared to the number of potential closed triples
transitivity(week_5_6.g)

#the diameter is the overall size of a network, the distance between the two farthest nodes
diameter(week_5_6.g)

#density is the number of ties in the network compared to the number of possible ties
edge_density(week_5_6.g)

#components are subgroups or communities. all actors are connected directly or indirectly
components(week_5_6.g)

In [37]:
##Betweenness

#This line calculates the betweenness of each county for our week 10 period
btwn.wk10 <- betweenness(week_5_6.g)

#This line assigns the corresponding name with its corresponding betweenness value
names(btwn.wk10) <- V(week_5_6.g)#county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of betweennees value
ind <- order(-btwn.wk10)

#This line allow us to see the top ten highest counties with betweenness values 
btwn.wk10[ind][1:10]

#The betweenness value is a value that allows us to see how often a county can be used as a link or gateway 
#between another county within the network

In [38]:
## Centrality 

#This line calculates the centrality of each faculty member within our initial data set
cent.eig <- evcent(week_5_6.g)

#This line assigns the corresponding name with its corresponding centrality value
names(cent.eig$vector) <- V(week_5_6.g) #county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of centrality value
ind <- order(-cent.eig$vector)

#This line allow us to see the top ten highest faculty members with centrality values 
cent.eig$vector[ind][1:10]

# Week 10-11 Summary

In [39]:
#the g size is the number edges in the network
gsize(week_10_11.g)

#vcount tells us the number of nodes, vertexes, or actors in the network
vcount(week_10_11.g)

#transitivity number of closed triples compared to the number of potential closed triples
transitivity(week_10_11.g)

#the diameter is the overall size of a network, the distance between the two farthest nodes
diameter(week_10_11.g)

#density is the number of ties in the network compared to the number of possible ties
edge_density(week_10_11.g)

#components are subgroups or communities. all actors are connected directly or indirectly
components(week_10_11.g)

In [40]:
##Betweenness

#This line calculates the betweenness of each county for our week 10 period
btwn.wk10 <- betweenness(week_10_11.g)

#This line assigns the corresponding name with its corresponding betweenness value
names(btwn.wk10) <- V(week_10_11.g)#county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of betweennees value
ind <- order(-btwn.wk10)

#This line allow us to see the top ten highest counties with betweenness values 
btwn.wk10[ind][1:10]

#The betweenness value is a value that allows us to see how often a county can be used as a link or gateway 
#between another county within the network

In [49]:
V(week_10_11.g)[777]

+ 1/2363 vertex, named, from 26b213f:
[1] 18173

In [50]:
## Centrality 

#This line calculates the centrality of each faculty member within our initial data set
cent.eig <- evcent(week_10_11.g)

#This line assigns the corresponding name with its corresponding centrality value
names(cent.eig$vector) <- V(week_10_11.g) #county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of centrality value
ind <- order(-cent.eig$vector)

#This line allow us to see the top ten highest faculty members with centrality values 
cent.eig$vector[ind][1:10]

In [51]:
V(week_10_11.g)[1880]

+ 1/2363 vertex, named, from 26b213f:
[1] 42107

# Week 13-14 Summary

In [42]:
#the g size is the number edges in the network
gsize(week_13_14.g)

#vcount tells us the number of nodes, vertexes, or actors in the network
vcount(week_13_14.g)

#transitivity number of closed triples compared to the number of potential closed triples
transitivity(week_13_14.g)

#the diameter is the overall size of a network, the distance between the two farthest nodes
diameter(week_13_14.g)

#density is the number of ties in the network compared to the number of possible ties
edge_density(week_13_14.g)

#components are subgroups or communities. all actors are connected directly or indirectly
components(week_13_14.g)

In [43]:
##Betweenness

#This line calculates the betweenness of each county for our week 10 period
btwn.wk10 <- betweenness(week_13_14.g)

#This line assigns the corresponding name with its corresponding betweenness value
names(btwn.wk10) <- V(week_13_14.g)#county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of betweennees value
ind <- order(-btwn.wk10)

#This line allow us to see the top ten highest counties with betweenness values 
btwn.wk10[ind][1:10]

#The betweenness value is a value that allows us to see how often a county can be used as a link or gateway 
#between another county within the network

In [45]:
## Centrality 

#This line calculates the centrality of each faculty member within our initial data set
cent.eig <- evcent(week_13_14.g)

#This line assigns the corresponding name with its corresponding centrality value
names(cent.eig$vector) <- V(week_13_14.g) #county_states2$county[county_states2$fips %in% rownames(as.matrix(V(week_6_7.g)))]

#This line now rearrages the data in decreasing order of centrality value
ind <- order(-cent.eig$vector)

#This line allow us to see the top ten highest faculty members with centrality values 
cent.eig$vector[ind][1:10]

# ISSUES

Indexing the graphs objects to produce the proper county fips codes