In [13]:
library(tidyverse)
library(countrycode)
library(magrittr)
library(glue)


Attaching package: ‘glue’

The following object is masked from ‘package:dplyr’:

    collapse



## Calculo RCA

In [2]:
rel_comp_adv <- function(data, country_filt=NA, digits=4){
  
  if (digits<5) { #si tiene 5 son los datos originales y me ahorro el calculo
    data <- data %>% 
      mutate(SITC = as.character(substr(SITC,1,digits))) %>% 
      group_by(year, reporter,rep_iso, SITC) %>% 
      summarise(value = sum(value))

  }
  
  #el denominador se calcula con todos los paises
  mean_dist_SITC <- data %>%
    group_by(year, SITC) %>% 
    summarise(value = sum(as.numeric(value),na.rm = T)) %>% 
    group_by(year) %>% 
    mutate(mean_prop = value/sum(value, na.rm = TRUE))

  if (!is.na(country_filt)) { #filtro para elegir resultados solo de una seleccion de paises
    data <- data %>% 
    filter(rep_iso %in% country_filt)
  }
  
  #el denominador despues del filtro
  data <- data %>%
    group_by(year, SITC, rep_iso,reporter) %>% 
    summarise(value = sum(as.numeric(value),na.rm = T)) %>% 
    group_by(year,rep_iso) %>% 
    mutate(prop = value / sum(value, na.rm = TRUE))
  
  data <- data %>% 
    left_join(mean_dist_SITC %>% select(year,SITC,mean_prop),by = c("year", "SITC")) %>% 
    mutate(RCA = prop/mean_prop)

  data
}


In [4]:
#Leo la info
data <- read_csv(file = "../dataset/Export_World_directo.csv",col_types = cols(SITC = col_character()))
#solo tengo que quedarme con los productos a 5 digitos
data <- data %>% filter(nchar(SITC)==5)

“1 parsing failure.
row # A tibble: 1 x 5 col      row col   expected  actual    file                                expected    <int> <chr> <chr>     <chr>     <chr>                               actual 1 977509 <NA>  5 columns 4 columns '../dataset/country_yr_sitc_4d.csv' file # A tibble: 1 x 5
”

In [27]:
RCA <- rel_comp_adv(data = data, digits = 4)
write_delim(RCA,"results/RCA_mundo4d.txt",delim = ",")

-----------

#### Largo plazo

tengo que cambiar algunos detalles de la funcion para los datos en LP

In [7]:
rel_comp_adv <- function(data, country_filt=NA){
    
  #el denominador se calcula con todos los paises
  mean_dist_SITC <- data %>%
    group_by(year, SITC) %>% 
    summarise(value = sum(as.numeric(export_value),na.rm = T)) %>% 
    group_by(year) %>% 
    mutate(mean_prop = value/sum(value, na.rm = TRUE))

  if (!is.na(country_filt)) { #filtro para elegir resultados solo de una seleccion de paises
    data <- data %>% 
    filter(rep_iso %in% country_filt)
  }
  
  #el denominador despues del filtro
  data <- data %>%
    group_by(year, SITC, rep_iso,reporter) %>% 
    summarise(value = sum(as.numeric(export_value),na.rm = T)) %>% 
    group_by(year,rep_iso) %>% 
    mutate(prop = value / sum(value, na.rm = TRUE))
  
  data <- data %>% 
    left_join(mean_dist_SITC %>% select(year,SITC,mean_prop),by = c("year", "SITC")) %>% 
    mutate(RCA = prop/mean_prop)

  data
}

In [5]:
#Leo la info
data <- read_csv(file = "../dataset/country_yr_sitc_4d.csv",col_types = cols(SITC = col_character()))


In [10]:
RCA <- rel_comp_adv(data = data)

write_delim(RCA,"results/RCA_LP.txt",delim = ",")

----------------

# Calculo Similitud

In [26]:
symmetric_max <- function(M){
  M[M<t(M)] <- M[M<t(M)]
  M[M>t(M)] <- t(M)[M>t(M)]
  return(M)
}

In [29]:
similarity <- function(RCA){
  
  cualitative_RCA <- RCA %>% 
    mutate(RCA = as.integer(case_when(RCA > 1 ~ 1,
                                      RCA <= 1 ~ 0)))
  w <- cualitative_RCA %>% spread(., reporter,RCA,fill = 0) %>% 
    ungroup() 
  
  SITC <- w$SITC 
  mat <- as.matrix(w[,-1])
  v <- mat %*% t(mat)                                   
  diag(v) <- 0                                      
  dimnames(v) <- list(SITC, SITC) 
  totales <- rowSums(w[,-1])
  probabilities <- v/totales
  
  symmetric_proba <- symmetric_max(probabilities)
  return(symmetric_proba)   
}

In [30]:
RCA <- read_csv("results/RCA_mundo4d.txt",col_types = cols(SITC = col_character()))

In [31]:
RCA %>% glimpse

Observations: 1,976,058
Variables: 8
$ year      <int> 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 1996, 199...
$ SITC      <chr> "1110", "1110", "1110", "1110", "1110", "1110", "1110", "...
$ rep_iso   <chr> "ALB", "AND", "ARG", "AUS", "AUT", "BEL", "BGR", "CAN", "...
$ reporter  <chr> "Albania", "Andorra", "Argentina", "Australia", "Austria"...
$ value     <dbl> 6278, 1890590, 16736860, 32270112, 209871220, 367487383, ...
$ prop      <dbl> 3.054278e-05, 4.077423e-02, 9.772752e-04, 6.681516e-04, 3...
$ mean_prop <dbl> 0.0009233057, 0.0009233057, 0.0009233057, 0.0009233057, 0...
$ RCA       <dbl> 0.033079813, 44.161134546, 1.058452460, 0.723651520, 3.83...


In [60]:
RCA_2016 <- RCA %>%
filter(year == 2016)%>%
select(-year, -rep_iso, -value, -prop, -mean_prop)


In [61]:
symmetric_proba <- similarity(RCA = RCA_2016)
symmetric_proba_df <-as_data_frame(symmetric_proba)
symmetric_proba_df$SITC <-names(symmetric_proba_df)
symmetric_proba_df <- symmetric_proba_df %>% select(SITC, everything())

In [62]:
write_csv(symmetric_proba_df,"results/similitud_4d_2016.csv")

similitud con RCA promedio

In [63]:
RCA_promedio <- RCA %>%
group_by(reporter,SITC)%>%
summarise(RCA = mean(RCA))

In [64]:
symmetric_proba <- similarity(RCA = RCA_promedio)
symmetric_proba_df <-as_data_frame(symmetric_proba)
symmetric_proba_df$SITC <-names(symmetric_proba_df)
symmetric_proba_df <- symmetric_proba_df %>% select(SITC, everything())

write_csv(symmetric_proba_df,"results/similitud_4d_mean.csv")

In [66]:
symmetric_proba_df

SITC,1110,1121,1122,1123,1124,1211,1212,1213,1221,⋯,8991,8992,8993,8994,8996,8997,8998,8999,9610,9710
1110,0.0000000,0.26315789,0.48684211,0.61842105,0.43421053,0.27631579,0.30263158,0.28947368,0.4342105,⋯,0.22368421,0.03947368,0.42105263,0.10526316,0.17105263,0.30263158,0.14473684,0.13157895,0.34210526,0.27631579
1121,0.2631579,0.00000000,0.30158730,0.25373134,0.30357143,0.18965517,0.21276596,0.19672131,0.1666667,⋯,0.10256410,0.03333333,0.19607843,0.06666667,0.10000000,0.11363636,0.16666667,0.06451613,0.25490196,0.12698413
1122,0.4868421,0.30158730,0.00000000,0.53731343,0.44444444,0.30158730,0.25396825,0.34920635,0.3174603,⋯,0.20634921,0.01587302,0.34920635,0.04761905,0.14285714,0.26984127,0.20634921,0.20634921,0.28571429,0.33333333
1123,0.6184211,0.25373134,0.53731343,0.00000000,0.52238806,0.32835821,0.31343284,0.32835821,0.4179104,⋯,0.26865672,0.02985075,0.31343284,0.05970149,0.13432836,0.31343284,0.10447761,0.14925373,0.26865672,0.28358209
1124,0.4342105,0.30357143,0.44444444,0.52238806,0.00000000,0.24137931,0.16071429,0.22950820,0.4464286,⋯,0.17857143,0.01785714,0.30357143,0.05357143,0.16071429,0.26785714,0.16071429,0.07142857,0.32142857,0.23809524
1211,0.2763158,0.18965517,0.30158730,0.32835821,0.24137931,0.00000000,0.62068966,0.67213115,0.3103448,⋯,0.22413793,0.05172414,0.32758621,0.03448276,0.06896552,0.25862069,0.12068966,0.32758621,0.24137931,0.42857143
1212,0.3026316,0.21276596,0.25396825,0.31343284,0.16071429,0.62068966,0.00000000,0.59016393,0.2708333,⋯,0.29787234,0.02127660,0.33333333,0.04255319,0.06382979,0.27659574,0.10638298,0.29787234,0.27450980,0.31746032
1213,0.2894737,0.19672131,0.34920635,0.32835821,0.22950820,0.67213115,0.59016393,0.00000000,0.2950820,⋯,0.24590164,0.04918033,0.27868852,0.08196721,0.06557377,0.27868852,0.13114754,0.26229508,0.22950820,0.41269841
1221,0.4342105,0.16666667,0.31746032,0.41791045,0.44642857,0.31034483,0.27083333,0.29508197,0.0000000,⋯,0.22916667,0.04166667,0.31372549,0.04166667,0.18750000,0.33333333,0.12500000,0.18750000,0.25490196,0.17460317
1222,0.5476190,0.21428571,0.38095238,0.47619048,0.38095238,0.39285714,0.30952381,0.40476190,0.3452381,⋯,0.20238095,0.02380952,0.29761905,0.09523810,0.09523810,0.30952381,0.13095238,0.22619048,0.27380952,0.36904762


----------------

#### Largo plazo 

In [None]:
RCA <- read_csv("results/RCA_LP.txt",col_types = cols(SITC = col_character()))

In [16]:
RCA_2016 <- RCA %>%
filter(year == 2016)%>%
select(-reporter, -value, -prop, -mean_prop)


In [17]:
RCA_2016 %>% glimpse

Observations: 112,082
Variables: 4
$ year    <int> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,...
$ SITC    <chr> "0011", "0011", "0011", "0011", "0011", "0011", "0011", "00...
$ rep_iso <chr> "AND", "ANS", "ARE", "ARG", "ARM", "AUS", "AUT", "AZE", "BE...
$ RCA     <dbl> 0.028455351, 0.044012543, 0.022429795, 0.022640748, 1.77333...


In [24]:
## En esta version. Creo un id por reporter_year.
similarity <- function(RCA){
  
  cualitative_RCA <- RCA %>% 
    mutate(RCA = as.integer(case_when(RCA > 1 ~ 1,
                                      RCA <= 1 ~ 0)),
          id = paste0(rep_iso,'_',year))%>%
    ungroup() %>%
    select(id,SITC,RCA)
  w <- cualitative_RCA %>% spread(., id,RCA,fill = 0) %>% 
    ungroup() 
  
  SITC <- w$SITC 
  mat <- as.matrix(w[,-1])
  v <- mat %*% t(mat)                                   
  diag(v) <- 0                                      
  dimnames(v) <- list(SITC, SITC) 
  totales <- rowSums(w[,-1])
  probabilities <- v/totales
  
  symmetric_proba <- symmetric_max(probabilities)
  return(symmetric_proba)   
}

In [28]:
symmetric_proba <- similarity(RCA = RCA_2016)
symmetric_proba_df <-as_data_frame(symmetric_proba)
symmetric_proba_df$SITC <-names(symmetric_proba_df)
symmetric_proba_df <- symmetric_proba_df %>% select(SITC, everything())

In [30]:
write_csv(symmetric_proba_df,"results/similitud_LP_2016.csv")

Utilizando todos los paises&años

In [None]:
RCA_all <- RCA %>%
select(-reporter, -value, -prop, -mean_prop)
