In [None]:
import random

In [None]:
class TRIEST_BASE:
    def __init__(self, M = 100):
        self.M = M
        self.S = set()
        self.global_counter = 0
        self.local_counter = {}
        self.t = 0

    def sample_edge(self, Edge, t):
        if t <= self.M:
            return True
        if random.random() <= (self.M/t):
            random_edge = random.sample(self.S, 1)[0]
            self.S.remove(random_edge)
            self.update_counters('-', Edge)
            return True
        return False

    def update_counters(self, operation, edge):
        # edge E = (u,v)
        u = edge[0]
        v = edge[1]
        neighborhood_of_u = set()
        neighborhood_of_v = set()
        # construct neighborhood of u
        for one_edge in self.S:
            if u == one_edge[0]:
                neighborhood_of_u.add(one_edge[1])
            if u == one_edge[1]:
                neighborhood_of_u.add(one_edge[0])
            # construct neighborhood of v
            if v == one_edge[0]:
                neighborhood_of_v.add(one_edge[1])
            if v == one_edge[1]:
                neighborhood_of_v.add(one_edge[0])
        # shared neighborhood of u and v
        shared_neighborhood = set.intersection(neighborhood_of_u, neighborhood_of_v)
        # update counters
        for c in shared_neighborhood:
            if operation == '+':
                self.global_counter += 1
                self.local_counter[c] = self.local_counter.get(c, 0) + 1
                self.local_counter[u] = self.local_counter.get(u, 0) + 1
                self.local_counter[v] = self.local_counter.get(v, 0) + 1

            if operation == '-':
                self.global_counter -= 1
                self.local_counter[c] = self.local_counter.get(c, 0) - 1
                if self.local_counter[c] <= 0:
                    del self.local_counter[c]
                self.local_counter[u] = self.local_counter.get(u, 0) - 1
                if self.local_counter[u] <= 0:
                    del self.local_counter[u]
                self.local_counter[v] = self.local_counter.get(v, 0) - 1
                if self.local_counter[v] <= 0:
                    del self.local_counter[v]

    def run_triest_base(self, streams):

        for element in streams:
            self.t += 1
            if self.sample_edge(element, self.t):
                self.S.add(element)
                self.update_counters('+', element)

        eps = (self.t * (self.t - 1) * (self.t - 2)) / (self.M * (self.M - 1) * (self.M - 2))

        eps = max(1, eps)
        print('Epsilon is ', eps)
        # estimation for the global triangle count
        est_gc = eps * self.global_counter
        return est_gc

In [None]:
# import dataset
streams = set()
size_stream = 0
with open("web-Stanford.txt") as f:
    for line in f:
        if line[0] == '#':
            continue
        edge = line.split()
        if edge[0] != edge[1]:
            streams.add((edge[0], edge[1]))
            size_stream += 1
        # if size_stream == 10000:
        #     break

print('The amount of edges of Data Stream contains', len(streams))


In [None]:
triest_base = TRIEST_BASE(80000)
print('the value of M is', triest_base.M)
glo_tri_counter = triest_base.run_triest_base(streams)
print('Estimation for the global triangle count is', glo_tri_counter)

triest_base = TRIEST_BASE(size_stream)
print('the value of M is', size_stream)
glo_tri_counter = triest_base.run_triest_base(streams)
print('Actual global triangle count is', glo_tri_counter)
