In [1]:
library(dplyr)
library(igraph)
library(ggplot2)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘igraph’

The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union

The following objects are masked from ‘package:stats’:

    decompose, spectrum

The following object is masked from ‘package:base’:

    union



In [2]:
df <- read.csv("/home/polichinel/Dropbox/KU/9.semester/SNA/data/DIPCON_3.0_Dyads.csv")

In [3]:
head(df,3)

X,ccode1,abbrev1,ccode2,abbrev2,dipcon1970,dipcon1975,dipcon1980,dipcon1985,dipcon1990,dipcon1995,dipcon2000,dipcon2005,dipcon2010
0,2,USA,20,CAN,1.0,1,1,1,1,1,1,1,1
1,2,USA,31,BHM,,0,1,1,1,1,1,1,1
2,2,USA,40,CUB,0.0,0,0,0,0,0,0,1,1


In [65]:
get_graph_lists <- function(df){

    years <- c(1970,1975,1980,1985,1990,1995,2000,2005,2010)
    list_of_graphs <- list()

    for (i in 1 : length(years)) {

        feature_name <- paste0('dipcon',years[i])
        graph_name <- paste0('G_',years[i])

        edge_list <- df %>% filter(df[feature_name] > 0) %>% select(abbrev1, abbrev2)# year specific edge lists
        
        G <- graph.data.frame(edge_list, directed = T)

        list_of_graphs[[graph_name]] <- G


    }
    
    return(list_of_graphs)       

}

### Så nu skal du lave 9 datasæt med alle relevante variabler:

- Degree (normalized)
- Degree centrality (normalized)
- Betweenness centrality (normalized?)

Men du kan ligeså godt også få de aggregerede variabler in der:

- graph_mean_degree
- graph_mean_degree_centrality
- graph_mean_betweenness_centrality
- graph_density
- graph_degree_centralization
- graph_betweenness_cantralisation

Herfra burde du kunne lave de relevante feautures, plots og estimationer i python.

In [94]:
get_df_list <- function(df){

    list_of_graphs <- get_graph_lists(df)
    list_of_df <- list()


    for (i in 1 : length(list_of_graphs)){

        years <- c(1970,1975,1980,1985,1990,1995,2000,2005,2010)
        graph_name <- paste0('G_',years[i])
        df_name <- paste0('df_',years[i])

        G <- list_of_graphs[[graph_name]]
        
        G <-simplify(G, remove.loops = T, remove.multiple = F)

        df_G <- data.frame(country = V(G)$name,
                      year = years[i],
                      indegree_norm = degree(G, mode = "in", normalized = T),
                      indegree = degree(G, mode = "in", normalized = F),
                      outdegree_norm = degree(G, mode = "out", normalized = T),
                      outdegree = degree(G, mode = "out", normalized = F),
                      degree_norm = degree(G, mode = "all", normalized = T),
                      degree = degree(G, mode = "all", normalized = F),
                      betweenness_dir = betweenness(G, directed = T, normalized = F),
                      betweenness = betweenness(G, directed = F, normalized = F),                  
                      betweenness_dir_norm = betweenness(G, directed = T, normalized = T),
                      betweenness_norm = betweenness(G, directed = F, normalized = T),
                      graph_deg_cantr_in = centr_degree(G, normalized = T, mode = 'in'),
                      graph_deg_cantr_out = centr_degree(G, normalized = T, mode = 'out'),
                      graph_deg_cantr_all = centr_degree(G, normalized = T, mode = 'all'),
                      graph_betw_cantr_in = centr_betw(G, normalized = T),
                      graph_dens = graph.density(G))

        list_of_df[[df_name]] <- df_G 

    }
    
    return(list_of_df)   
    
}

In [100]:
# test
list_of_df <- get_df_list(df)

head(list_of_df$df_1970,3)
head(list_of_df$df_2010,3)

Unnamed: 0,country,year,indegree_norm,indegree,outdegree_norm,outdegree,degree_norm,degree,betweenness_dir,betweenness,⋯,graph_deg_cantr_out.res,graph_deg_cantr_out.centralization,graph_deg_cantr_out.theoretical_max,graph_deg_cantr_all.res,graph_deg_cantr_all.centralization,graph_deg_cantr_all.theoretical_max,graph_betw_cantr_in.res,graph_betw_cantr_in.centralization,graph_betw_cantr_in.theoretical_max,graph_dens
USA,USA,1970,0.780303,103,0.719697,95,1.5,198,1993.2812,955.85127,⋯,95,0.6099909,17556,198,0.5840794,34848,1993.2812,0.1098925,2282544,0.2081909
CAN,CAN,1970,0.4545455,60,0.4469697,59,0.9015152,119,227.6034,98.96435,⋯,59,0.6099909,17556,119,0.5840794,34848,227.6034,0.1098925,2282544,0.2081909
CUB,CUB,1970,0.219697,29,0.2045455,27,0.4242424,56,31.4925,14.59579,⋯,27,0.6099909,17556,56,0.5840794,34848,31.4925,0.1098925,2282544,0.2081909


Unnamed: 0,country,year,indegree_norm,indegree,outdegree_norm,outdegree,degree_norm,degree,betweenness_dir,betweenness,⋯,graph_deg_cantr_out.res,graph_deg_cantr_out.centralization,graph_deg_cantr_out.theoretical_max,graph_deg_cantr_all.res,graph_deg_cantr_all.centralization,graph_deg_cantr_all.theoretical_max,graph_betw_cantr_in.res,graph_betw_cantr_in.centralization,graph_betw_cantr_in.theoretical_max,graph_dens
USA,USA,2010,0.97354497,184,0.8042328,152,1.77777778,336,3736.4020213,1887.5063956,⋯,152,0.616235,35910,336,0.6620475,71442,3736.4020213,0.1014659,6715548,0.2303258
CAN,CAN,2010,0.68783069,130,0.48677249,92,1.17460317,222,578.2783077,318.5855304,⋯,92,0.616235,35910,222,0.6620475,71442,578.2783077,0.1014659,6715548,0.2303258
BHM,BHM,2010,0.02645503,5,0.03174603,6,0.05820106,11,0.4044652,0.1233333,⋯,6,0.616235,35910,11,0.6620475,71442,0.4044652,0.1014659,6715548,0.2303258


### Concatenate dfs:

In [97]:
df_full <- rbind(list_of_df$df_1970,
                  list_of_df$df_1975,
                  list_of_df$df_1980,
                  list_of_df$df_1985,
                  list_of_df$df_1990,
                  list_of_df$df_1995,
                  list_of_df$df_2000,
                  list_of_df$df_2005,
                  list_of_df$df_2010)
dim(df_full)

### Save fore python:

In [None]:
write.csv(df_full, "diplodata_network_full.csv")