In [1]:
library("network")
library("tidyverse")

# Return Named Edge List
name_edgelist <- function(graph_object){

  # Return an edgelst from the gaph object
  # that is named rather than using nodeIDs

  names <- graph_object %v% "vertex.names"

  numbers <- 1:network.size(graph_object)

  df <- as.data.frame(cbind(names, numbers))

  n <- as.data.frame(as.edgelist(graph_object))

  n$V1 <- plyr::mapvalues(n$V1,
                          from = numbers,
                          to = names, 
                          warn_missing = FALSE)

  n$V2 <- plyr::mapvalues(n$V2,
                          from=numbers,
                          to=names,
                          warn_missing = FALSE)

  n <- n %>%
    rename("PaperId" = "V1",
           "JournalId" = "V2")
  return(n)
}


‘network’ 1.17.1 (2021-06-12), part of the Statnet Project
* ‘news(package="network")’ for changes since last version
* ‘citation("network")’ for citation information
* ‘https://statnet.org’ for help, support, and other information


Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──

[32m✔[39m [34mggplot2[39m 3.1.1       [32m✔[39m [34mpurrr  [39m 0.3.4  
[32m✔[39m [34mtibble [39m 3.1.5       [32m✔[39m [34mdplyr  [39m 0.8.0.[31m1[39m
[32m✔[39m [34mtidyr  [39m 0.8.3       [32m✔[39m [34mstringr[39m 1.4.0  
[32m✔[39m [34mreadr  [39m 1.3.1       [32m✔[39m [34mforcats[39m 0.4.0  

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34

### Load and Initialize Two-Mode Network 

Load the papers to journals edgelist, which has all the papers by the probable matches between the MAG and network data. 

In [2]:
papers2journals <- read_csv("/home/timothyelder/Documents/data/disambig/edge_list.csv")

papers2journals <- papers2journals[,2:3]

papers2journals <- as.data.frame(papers2journals)

head(papers2journals)

bi_net <- network(papers2journals, bipartite = TRUE)

“Missing column names filled in: 'X1' [1]”
Parsed with column specification:
cols(
  X1 = [32mcol_double()[39m,
  PaperId = [32mcol_double()[39m,
  JournalId = [32mcol_double()[39m
)



PaperId,JournalId
1991579188,179701344
2028721363,131590804
2239830555,2754179166
2949276763,201989124
2325118576,130611943
3116694480,16484115


“If `bipartite` is `TRUE`, edges are interpreted as undirected.”


In [None]:
as.matrix(bi_net)

In [131]:
el <- name_edgelist(bi_net)

# need to reorder the columns so the journalid is in the first column
# we have to do this so that the projected network is a journal to 
# journal network where shared papers are edges

el <- el[, c(2, 1)]

head(el)

JournalId,PaperId
179701344,1991579188
131590804,2028721363
2754179166,2239830555
201989124,2949276763
130611943,2325118576
16484115,3116694480


In [127]:
el <- tnet::as.tnet(el, type="binary two-mode tnet")
el

i,p
58878,1
58878,1970
58878,39724
58878,48537
58879,2
58879,672
58879,1386
58879,3236
58879,4071
58879,6477


“The network might be undirected. If this is the case, each tie should be mention twice. The symmetrise-function can be used to include reverse version of each tie.”
“no non-missing arguments to min; returning Inf”


i,j,w


In [None]:

# This should be the weighted graph object for speci
g2 <- network::as.network(as.matrix(new_net),
                          matrix.type = "edgelist",
                          directed = TRUE)

# assign vertex names
network.vertex.names(g2) <- row.names(cosponsors_binary)

plot.network(g2, # our network object
             vertex.cex = (degree(g2)/5), # size nodes by their age
             displaylabels = T, # show the node names
             label.pos = 5, # display the names directly over nodes
             usearrows = FALSE,
             pad = .00001
)

# creating dataframe and calculating network statistics
names <- g2 %v% "vertex.names"
numbers <- 1:19
degree <- degree(g2)
eigen <- evcent(g2)

df <- as.data.frame(cbind(names,degree, eigen))
df$degree <- as.numeric(df$degree)
sum(df$degree)/nrow(df)


df <- as.data.frame(cbind(names,numbers))

# Write function for returning named edge list using vertex names and he following functions,
# including warning if the vertex.names are not characters, and check if they are always returned
# as characters

n <- as.data.frame(as.edgelist(g2))

new_net$i <- plyr::mapvalues(new_net$i,
                             from=numbers,
                             to=names)

new_net$j <- plyr::mapvalues(new_net$j,
                             from=numbers,
                             to=names)

new_net <- new_net %>%
  rename("source" = "i",
         "target" = "j")

#write.csv(new_net, "/Users/timothyelder/Documents/dissertation/data/quant/prim2prim.csv", row.names = FALSE)

# Now for the subspecialty to subspecialty network
# Binary one-mode projection
# reverse the columns in edegelist
el <- as.data.frame(el)
el <- el[c("V2", "V1")]
el <- as.matrix(el)

new_net <- tnet::projecting_tm(el, method="sum")
new_net <- new_net %>% rename("weight" = "w")
#new_net <- new_net %>% select(-"weight")

# This should be the weighted graph object for speci
g2 <- as.network(new_net,
                 matrix.type = "edgelist",
                 directed = TRUE, binary = FALSE)

g2

# assign vertex names
network.vertex.names(g2) <- colnames(cosponsors_binary)

plot.network(g2, # our network object
             vertex.cex = (degree(g2) / 5), # size nodes by their age
             displaylabels = T, # show the node names
             label.pos = 5, # display the names directly over nodes
             usearrows = FALSE,
             pad = .00001
)

# creating dataframe and calculating network statistics
names <- g2 %v% "vertex.names"
numbers <- 20:39
degree <- degree(g2)
eigen <- evcent(g2)

df <- as.data.frame(cbind(names, degree, eigen))
df$degree <- as.numeric(df$degree)
sum(df$degree) / nrow(df)


df <- as.data.frame(cbind(names, numbers))

# Write function for returning named edge list using
# vertex names and he following functions, including
# warning if the vertex.names are not characters, and
# check if they are always returned as characters

n <- as.data.frame(as.edgelist(g2))

new_net$i <- plyr::mapvalues(new_net$i,
                             from = numbers,
                             to = names)

new_net$j <- plyr::mapvalues(new_net$j,
                             from = numbers,
                             to = names)

new_net <- new_net %>%
  rename("source" = "i",
         "target" = "j")

write.csv(new_net,
          "/Users/timothyelder/Documents/dissertation/data/quant/sub2sub.csv",
          row.names = FALSE)
