In [21]:
library(tidyverse)
library(countrycode)
library(magrittr)


Attaching package: ‘magrittr’

The following object is masked from ‘package:purrr’:

    set_names

The following object is masked from ‘package:tidyr’:

    extract



## Calculo RCA

In [17]:
rel_comp_adv <- function(data, country_filt=NA, digits=4){
  
  if (digits<5) { #si tiene 5 son los datos originales y me ahorro el calculo
    data <- data %>% 
      mutate(SITC = as.character(substr(SITC,1,digits))) %>% 
      group_by(year, reporter,rep_iso, SITC) %>% 
      summarise(value = sum(value))

  }
  
  #el denominador se calcula con todos los paises
  mean_dist_SITC <- data %>%
    group_by(year, SITC) %>% 
    summarise(value = sum(as.numeric(value),na.rm = T)) %>% 
    group_by(year) %>% 
    mutate(mean_prop = value/sum(value, na.rm = TRUE))

  if (!is.na(country_filt)) { #filtro para elegir resultados solo de una seleccion de paises
    data <- data %>% 
    filter(rep_iso %in% country_filt)
  }
  
  #el denominador despues del filtro
  data <- data %>%
    group_by(year, SITC, rep_iso,reporter) %>% 
    summarise(value = sum(as.numeric(value),na.rm = T)) %>% 
    group_by(year,rep_iso) %>% 
    mutate(prop = value / sum(value, na.rm = TRUE))
  
  data <- data %>% 
    left_join(mean_dist_SITC %>% select(year,SITC,mean_prop),by = c("year", "SITC")) %>% 
    mutate(RCA = prop/mean_prop)

  data
}


In [26]:
#Leo la info
data <- read_csv(file = "../dataset/Export_World_directo.csv",col_types = cols(SITC = col_character()))

#solo tengo que quedarme con los productos a 5 digitos
data <- data %>% filter(nchar(SITC)==5)

In [27]:
RCA <- rel_comp_adv(data = data, digits = 4)


write_delim(RCA,"results/RCA_mundo4d.txt",delim = ",")

# Calculo Similitud

In [28]:
symmetric_max <- function(M){
  M[M<t(M)] <- M[M<t(M)]
  M[M>t(M)] <- t(M)[M>t(M)]
  return(M)
}

In [29]:
similarity <- function(RCA){
  
  cualitative_RCA <- RCA %>% 
    mutate(RCA = as.integer(case_when(RCA > 1 ~ 1,
                                      RCA <= 1 ~ 0)))
  w <- cualitative_RCA %>% spread(., reporter,RCA,fill = 0) %>% 
    ungroup() 
  
  SITC <- w$SITC 
  mat <- as.matrix(w[,-1])
  v <- mat %*% t(mat)                                   
  diag(v) <- 0                                      
  dimnames(v) <- list(SITC, SITC) 
  totales <- rowSums(w[,-1])
  probabilities <- v/totales
  
  symmetric_proba <- symmetric_max(probabilities)
  return(symmetric_proba)   
}

In [30]:
RCA <- read_csv("results/RCA_mundo4d.txt",col_types = cols(SITC = col_character()))

In [31]:
RCA %>% glimpse

Observations: 1,976,058
Variables: 8
$ year      <int> 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 199...
$ SITC      <chr> "1110", "1110", "1110", "1110", "1110", "1110", "1110", "...
$ rep_iso   <chr> "ALB", "AND", "ARG", "AUS", "AUT", "BEL", "BGR", "CAN", "...
$ reporter  <chr> "Albania", "Andorra", "Argentina", "Australia", "Austria"...
$ value     <dbl> 6278, 1890590, 16736860, 32270112, 209871220, 367487383, ...
$ prop      <dbl> 3.054278e-05, 4.077423e-02, 9.772752e-04, 6.681516e-04, 3...
$ mean_prop <dbl> 0.0009233057, 0.0009233057, 0.0009233057, 0.0009233057, 0...
$ RCA       <dbl> 0.033079813, 44.161134546, 1.058452460, 0.723651520, 3.83...


In [32]:
RCA_2016 <- RCA %>%
filter(year == 2016)%>%
select(-year, -rep_iso, -value, -prop, -mean_prop)


In [53]:
symmetric_proba <- similarity(RCA = RCA_2016)
symmetric_proba_df <-as_data_frame(symmetric_proba)
symmetric_proba_df$SITC <-names(symmetric_proba_df)
symmetric_proba_df <- symmetric_proba_df %>% select(SITC, everything())

In [54]:
write_csv(data.frame(symmetric_proba),"results/similitud_4d_2016.csv")

similitud con RCA promedio

In [55]:
RCA_promedio <- RCA %>%
group_by(reporter,SITC)%>%
summarise(RCA = mean(RCA))

In [56]:
symmetric_proba <- similarity(RCA = RCA_promedio)
symmetric_proba_df <-as_data_frame(symmetric_proba)
symmetric_proba_df$SITC <-names(symmetric_proba_df)
symmetric_proba_df <- symmetric_proba_df %>% select(SITC, everything())

write_csv(data.frame(symmetric_proba),"results/similitud_4d_mean.csv")

In [59]:
dim(symmetric_proba) 