1. had to change chains to 2 when compiling the models

In [1]:
outputfile <- 'stan-fit-US2.rdata'
samplesfile <- 'samples-US2.rdata'
resultsfile <- 'results-elites-US2.rdata'

In [1]:
source('Barbera/functions.R')

## change the following lines to run this Rscript for other countries
matrixfile <- 'Barbera/adj-matrix-US.rdata'
#outputfile <- 'temp/stan-fit-US.rdata'
#samplesfile <- 'output/samples-US.rdata'
#resultsfile <- 'output/results-elites-US.rdata'
country <- 'US'

# parameters for Stan model
n.iter <- 1000
n.warmup <- 100
thin <- 2 ## this will give up to 200 effective samples for each chain and par

# loading data
load(matrixfile)

## starting values for elites (for identification purposes)
load("Barbera/elites-data.Rdata")

# US:
if (country=="US"){
	us <- elites.data[['US']]
	parties <- merge(
		data.frame(screen_name = colnames(y), stringsAsFactors=F),
		us[,c("screen_name", "party")], sort=FALSE, all.x=TRUE)$party
	start.phi <- rep(0, length(parties))
	start.phi[parties=="D"] <- -1
	start.phi[parties=="R"] <- 1
}

J <- dim(y)[1]

# choosing a sample of 10,000 "informative" users who follow 10 or more
# politicians, and then subsetting politicians followed by >200 of these

if (J>100){
  J <- 100
  inform <- which(rowSums(y)>10)
  set.seed(12345)
  subset.i <- sample(inform, J)
  y <- y[subset.i, ]
  start.phi <- start.phi[which(colSums(y)>2)]
  y <- y[,which(colSums(y)>2)]
}

## data for model
J <- dim(y)[1]
K <- dim(y)[2]
N <- J * K
jj <- rep(1:J, times=K)
kk <- rep(1:K, each=J)

stan.data <- list(J=J, K=K, N=N, jj=jj, kk=kk, y=c(as.matrix(y)))

## rest of starting values
colK <- colSums(y)
rowJ <- rowSums(y)
normalize <- function(x){ (x-mean(x))/sd(x) }

# set the initial parameters for the model. Currently set at 2 sets of inits. 1 init = 1 chain
inits <- rep(list(list(alpha=normalize(log(colK+0.0001)), 
	beta=normalize(log(rowJ+0.0001)),
  theta=rnorm(J), phi=start.phi,mu_beta=0, sigma_beta=1, 
  gamma=abs(rnorm(1)), mu_phi=0, sigma_phi=1, sigma_alpha=1)),4)


Loading required package: Matrix



In [11]:
inits

In [2]:
library(rstan)

Loading required package: StanHeaders

Loading required package: ggplot2

rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)

For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)



In [3]:
#bilinear
stan.code <- '
data {
  int<lower=1> J; // number of twitter users
  int<lower=1> K; // number of elite twitter accounts
  int<lower=1> N; // N = J x K
  int<lower=1,upper=J> jj[N]; // twitter user for observation n
  int<lower=1,upper=K> kk[N]; // elite account for observation n
  int<lower=0,upper=1> y[N]; // dummy if user i follows elite j
}
parameters {
  vector[K] alpha;
  vector[K] phi;
  vector[J] theta;
  vector[J] beta;
  real mu_beta;
  real<lower=0.1> sigma_beta;
  real mu_phi;
  real<lower=0.1> sigma_phi;
  real<lower=0.1> sigma_alpha;
  real gamma;
}
model {
  alpha ~ normal(0, sigma_alpha);
  beta ~ normal(mu_beta, sigma_beta);
  phi ~ normal(mu_phi, sigma_phi);
  theta ~ normal(0, 1); 
  for (n in 1:N)
    y[n] ~ bernoulli_logit( alpha[kk[n]] + beta[jj[n]] - 
      gamma * ( theta[jj[n]] * phi[kk[n]] ) );
}
'

## compiling model
stan.model <- stan(model_code=stan.code, 
    data = stan.data, init=inits, iter=1, warmup=0, chains=4)


SAMPLING FOR MODEL '583aab2d546e43b9f077bfd713049a38' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.003156 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 31.56 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1:          performed for num_warmup < 20
Chain 1: 
Chain 1: Iteration: 1 / 1 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 2e-06 seconds (Warm-up)
Chain 1:                0.003327 seconds (Sampling)
Chain 1:                0.003329 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL '583aab2d546e43b9f077bfd713049a38' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.001955 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 19.55 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2:          performed for num_warmup < 20
Chain 2: 
Chain 2: Iteration: 1 / 1 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 1e-06 seconds

“There were 4 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
“Examine the pairs() plot to diagnose sampling problems
”
“The largest R-hat is NA, indicating chains have not mixed.
Running the chains for more iterations may help. See
“Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
“Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See


In [4]:
# run bilinear with benchmark
start <- Sys.time()

stan.fit <- stan(fit=stan.model, data = stan.data, 
	iter=n.iter, warmup=n.warmup, chains=4, 
  	thin=thin, cores=4)

end <- Sys.time()
end - start

“There were 143 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
“There were 3 chains where the estimated Bayesian Fraction of Missing Information was low. See
“Examine the pairs() plot to diagnose sampling problems
”
“The largest R-hat is 2.22, indicating chains have not mixed.
Running the chains for more iterations may help. See
“Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
“Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See


Time difference of 10.43751 mins

In [4]:
# euclidean
stan.code <- '
data {
  int<lower=1> J; // number of twitter users
  int<lower=1> K; // number of elite twitter accounts
  int<lower=1> N; // N = J x K
  int<lower=1,upper=J> jj[N]; // twitter user for observation n
  int<lower=1,upper=K> kk[N]; // elite account for observation n
  int<lower=0,upper=1> y[N]; // dummy if user i follows elite j
}
parameters {
  vector[K] alpha;
  vector[K] phi;
  vector[J] theta;
  vector[J] beta;
  real mu_beta;
  real<lower=0.1> sigma_beta;
  real mu_phi;
  real<lower=0.1> sigma_phi;
  real<lower=0.1> sigma_alpha;
  real gamma;
}
model {
  alpha ~ normal(0, sigma_alpha);
  beta ~ normal(mu_beta, sigma_beta);
  phi ~ normal(mu_phi, sigma_phi);
  theta ~ normal(0, 1); 
  for (n in 1:N)
    y[n] ~ bernoulli_logit( alpha[kk[n]] + beta[jj[n]] - 
      gamma * square( theta[jj[n]] - phi[kk[n]] ) );
}
'

## compiling model
stan.model <- stan(model_code=stan.code, 
    data = stan.data, init=inits, iter=1, warmup=0, chains=2)


SAMPLING FOR MODEL '8992c9a87bed3e79f326f940366064d3' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.003112 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 31.12 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1:          performed for num_warmup < 20
Chain 1: 
Chain 1: Iteration: 1 / 1 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 1e-06 seconds (Warm-up)
Chain 1:                0.003181 seconds (Sampling)
Chain 1:                0.003182 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL '8992c9a87bed3e79f326f940366064d3' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.001831 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 18.31 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2:          performed for num_warmup < 20
Chain 2: 
Chain 2: Iteration: 1 / 1 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 2e-06 seconds

“There were 2 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
“Examine the pairs() plot to diagnose sampling problems
”
“The largest R-hat is NA, indicating chains have not mixed.
Running the chains for more iterations may help. See
“Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
“Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See


In [5]:
## running modle
# run euclidean with benchmark
# removed inits=inits
start <- Sys.time()

stan.fit <- stan(fit=stan.model, data = stan.data, 
	iter=n.iter, warmup=n.warmup, chains=2, 
  	thin=thin, cores=2)

end <- Sys.time()
end - start

“The largest R-hat is 1.88, indicating chains have not mixed.
Running the chains for more iterations may help. See
“Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
“Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See


Time difference of 1.535225 mins

In [5]:
samples <- extract(stan.fit, pars=c("alpha", "phi", "gamma", "mu_beta",
	"sigma_beta", "sigma_alpha"))

#save(samples, file=samplesfile)

In [6]:
results <- data.frame(
#	screen_name = samples$m.names,
	phi = apply(samples$phi, 2, mean),
	phi.sd = apply(samples$phi, 2, sd),
	alpha = apply(samples$alpha, 2, mean),
	alpha.sd = apply(samples$alpha, 2, sd),
	stringsAsFactors=F)
#save(results, file=resultsfile)

In [8]:
# Euclidean
results

phi,phi.sd,alpha,alpha.sd
<dbl>,<dbl>,<dbl>,<dbl>
0.038355195,1.0569436,4.0524835,0.4764157
0.029936621,0.6927476,2.7081662,0.2954601
0.006607209,0.3917197,-1.4618269,0.5301285
0.008799804,1.2335606,0.4742407,0.3808591
0.040677668,1.3991245,3.8765033,0.5783118
0.017042826,0.8804932,2.5184064,0.3701355
0.005840355,0.6000157,3.0990775,0.3005255
0.022404930,1.2832352,2.1293867,0.3885177
0.017983559,0.5606334,0.1334322,0.3948742
0.009400506,1.0140280,0.2223656,0.3329977


In [19]:
# bilinear
results

phi,phi.sd,alpha,alpha.sd
<dbl>,<dbl>,<dbl>,<dbl>
1.3583951,4.3778187,3.67930165,0.3872703
0.6531002,2.0983158,2.96247062,0.2786529
-0.0372242,1.2735909,-0.86099012,0.5747135
1.1448355,3.7636736,0.03925463,0.5175641
1.9949969,6.2668191,2.56028872,0.4266187
-1.1933432,3.5360072,2.09844679,0.3502107
-0.9000870,2.8076200,3.10086362,0.2843031
1.5900341,4.9399020,1.12213895,0.4686907
0.4286227,1.6274987,0.62806027,0.3888749
-1.1919851,3.5978872,-0.23460101,0.5019171


In [7]:
# bilinear 2
results

phi,phi.sd,alpha,alpha.sd
<dbl>,<dbl>,<dbl>,<dbl>
0.84616260,3.1947928,3.67533639,0.3586244
0.43213642,1.4823500,2.94566389,0.2626226
0.04306556,0.8330382,-0.85499274,0.5261412
0.70163681,2.7627028,0.02268334,0.5087080
1.31187525,4.5267238,2.53829510,0.4289468
-0.74899666,2.5069959,2.07089082,0.3472275
-0.61114630,1.9529559,3.09039389,0.3033879
0.92991021,3.6245365,1.14041744,0.4574240
0.29242056,1.1270830,0.63372328,0.3806208
-0.69905154,2.6227804,-0.26164692,0.5417013
