-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulation_m2.m
132 lines (110 loc) · 6.72 KB
/
simulation_m2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
function [inivertices, vertices, edges, edgelist] = simulation_m2(nrows, ncols, ncolors, shape, originalrows, shiftedrows, lambda, mu, simstep_max, max_vertices, max_edges, totalvertices, totaledgelocations, rplabelled_colors, rplabelled_edges)
% SIMULATION_M2.m creates the seeding, the contamination edges and the resulting
% synthetic dataset by simulation.
%
% This uses method 2, that is, random permutations.
%
% EXPLANATION OF VARIABLES
%
% inivertices Size: (nrows, ncols, 1+ncolors, simstep_max), essentially
% simstep_max independent random realizations of the seeding,
% before contamination would have taken place. Primary use
% is in visualization. See also vertices.
% vertices Size: (nrows, ncols, 1+ncolors, simstep_max), essentially
% simstep_max independent random realizations of the dataset.
% They store the synthetic dataset, similarly to how
% experiment_matrix stores the experimental data.
% vertices(:,:,1,simstep) are indicators of the area of interest.
% (These are identical for simstep=1:simstep_max.)
% vertices(i,j,2:ncolors+1) represent the presence
% of individual colors in the dataset in the given vertex (i,j).
% edges Size (nrows,ncols,3,simstep_max) matrix whose entries
% (i,j,k,simstep) are, for every fixed simstep, indicator
% variables of open edges between well (i,j) and its neighbor
% to the (k==1) right, (k==2) right down, (k==3) left down.
% edgelist List of the edges (connected neighbors) from the first of
% simstep_max simulations (simstep==1). It has size (4, no.
% of open edges). Each column stores an edge (v_1, v_2) as
% [rowindex(v_1); colindex(v_1); rowindex(v_2); colindex(v_2)].
%
% lambda Row vector of seeding rates for the ncolors colors
% mu Contamination rate (probability of any given
% undirected edge being open)
% simstep_max Total number of simulations
% simstep Index of current simulation in 1:simstep_max.
%
% totalvertices The total number of vertices in the lattice that belong to
% the area of interest. Defined as the sum of all entries of
% experiment_matrix(:,:,1), which is identical to max_vertices.
% threshvertices Row vector of length ncolors, the numbers of seeds for
% individual colors that will be put into vertices(:,:,:,simstep).
% totaledgelocations The total number of edges, whether open or closed,
% whose both ends belong to the area of interest. Defined
% as the sum of all entries of max_edges.
% threshedges Number of edges that will be put into edges(:,:,:,simstep).
% These are distributed randomly across edges(:,:,:,simstep)
% for every fixed simstep.
% rplabelled_colors Matrix of size (nrows, ncols, ncolors, simstep_max) whose
% entries are 0 outside the area of interest (i.e. where
% max_vertices==0) and a random permutation of (1, 2, ...,
% totalvertices) for every fixed i and simstep in
% rplabelled_colors(:,:,i,simstep).
% rplabelled_edges Matrix of size (nrows, ncols, 3, simstep_max) whose entries
% are 0 where there can be no edge (i.e. where max_edges==0)
% and a random permutation of (1, 2, ..., totaledgelocations)
% for every fixed simstep in rplabelled_edges(:,:,:,simstep).
%
% components1{i} Matrix which contains in one row the elements of the
% i'th connected component (linear indexing). Its purpose
% is that each vertex within a connected component will
% ultimately have the same color.
%
% Felix Beck, Bence Melykuti (University of Freiburg, Germany)
% 16/7/2015, 2/2/2017
vertices=cat(3, max_vertices, zeros(nrows,ncols,ncolors));
vertices=repmat(vertices, [1,1,1,simstep_max]);
edges=zeros(nrows,ncols,3,simstep_max);
threshvertices=round(lambda*totalvertices);
% We use the random permutations rplabelled_colors so that the proportions
% of seeded vertices are very accurately the respective parameters.
% We add a seed of color i if rplabelled_colors is both non-zero (greater
% than 0) and it is at most threshvertices(i).
vertices(:,:,2:1+ncolors,:) = (0<rplabelled_colors) .* (rplabelled_colors<=repmat(reshape(threshvertices,[1,1,ncolors,1]),[nrows,ncols,1,simstep_max]));
inivertices=vertices; % seeding state of vertices saved before contamination
threshedges=round(mu*totaledgelocations);
% We use the random permutations rplabelled_edges so that the proportion of
% open edges is very accurately the respective parameter.
% We add an edge if rplabelled_edges is both non-zero (greater than 0) and
% it is at most threshedges.
edges = (0<rplabelled_edges) .* (rplabelled_edges<=threshedges);
for simstep=1:simstep_max
% Find all the contaminated vertices
if simstep==1 % For later visualization of the first simulation, we store edgelist.
[edgelist, components1]=contamination(edges(:,:,:,simstep), nrows, ncols, originalrows, shiftedrows);
else
[~, components1]=contamination(edges(:,:,:,simstep), nrows, ncols, originalrows, shiftedrows);
end
if length(components1{1})>0 % Only do the following if there is at least one edge, one nontrivial component. Otherwise there is a size mismatch between (0,0) and (0,2) defining comp.
% Transform vertices for each simulation into 3D for linear indexing
vertices3d=vertices(:,:,:,simstep);
for i=1:size(components1,2) % Loop for all connected components of contamination
% for j=1:ncolors, j*nrows*ncols is justified by the dimensions of the
% matrix vertices; this way it 'jumps' into the (:,:,2:ncolors+1) matrices
% which represent all the colors
%{
% More transparent, original version applied contamination for connected
% component i and color j:
for j=1:ncolors
comp=components1{i}+j*nrows*ncols;
vertices3d(comp)=max(vertices3d(comp));
end
%}
% We pair elements of components1{i} with 1:ncolors in all possible ways.
comp=repmat(components1{i}',[1, ncolors])+repmat(1:ncolors, [length(components1{i}), 1])*nrows*ncols;
% Apply contamination for connected component i and color j for j=1:ncolors in one go
vertices3d(comp)=repmat(max(vertices3d(comp),[],1),length(components1{i}),1);
end
vertices(:,:,:,simstep)=vertices3d; % Transform vertices back into 4D
end % of if length(components1{1})>0
end % of for simstep
end