In [10]:
import unittest
from time import time, ctime, sleep
from notify_run import Notify
# CONSTANT
DATA_ROOT = "./data"
DBLP_PATH = DATA_ROOT + "/dblp"
AMAZON_PATH = DATA_ROOT + "/amazon"
FACEBOOK_PATH = DATA_ROOT + "/facebook"
NOTIFY_ENDPOINT = r"https://notify.run/O6EfLmG6Tof1s5DljYB7"

def test():
    testRunner = unittest.TextTestRunner()
    suite = unittest.defaultTestLoader.discover("./test/")
    testRunner.run(suite)

#import logging

#logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)

import pandas as pd
import networkx as nx
from copy import deepcopy

from package.tag import Tagger, TagRevenue, TagActiveNode
from package.model import DiffusionModel
from package.topic import TopicModel
from package.social_graph import SN_Graph
from package.itemset import ItemsetFlyweight, ItemRelation
from package.utils import getItemsPrice, read_items
from package.algorithm import Algorithm
from package.cluster_graph import ClusterGraph

NUM_TOPICS = 5
TOPICS = {
    "Node": {
        "0": [0.9, 0.1, 0.0],
        "1": [0.2, 0.8, 0.0],
        "2": [0.8, 0.2, 0.0],
        "3": [0.2, 0.4, 0.4],
    },
    "Item": {
        "iPhone": [0.7, 0.0, 0.3],
        "AirPods": [0.9, 0.0, 0.1],
        "Galaxy": [0.0, 0.8, 0.2],
    }
}
PRICES = {
    "iPhone": 50,
    "AirPods": 5,
    "Galaxy": 60,
}
RELATION = pd.DataFrame.from_dict({
            "iPhone":{
                "AirPods":10,
                "Galaxy":-5
            },
            "AirPods":{
                "iPhone":1,
                "Galaxy":0,
            },
            "Galaxy":{
                "iPhone":-8,
                "AirPods":1,
            }
            })

items = read_items(AMAZON_PATH + "/sample_items.csv")

topicModel = TopicModel(NUM_TOPICS)
topicModel.read_topics(node_file=FACEBOOK_PATH + "/nodes_with_" + str(NUM_TOPICS) + "_topic.csv",
                        items_file=AMAZON_PATH + "/items_with_" + str(NUM_TOPICS) + "_topic.csv")

graph = SN_Graph.construct(FACEBOOK_PATH + "/edges", topicModel, located=False)
relation = ItemRelation()
relation.construct(AMAZON_PATH + "/sample_items.csv")
itemset = ItemsetFlyweight(getItemsPrice(AMAZON_PATH + "/sample_items.csv"), topicModel, relation)

model = DiffusionModel(graph, itemset, threshold=10**(-5), name="amazon in dblp")
seed_size = min(itemset.size, graph.number_of_nodes())
seeds = model.selectSeeds(seed_size)
model.allocate(seeds, [itemset[asin] for asin in itemset.PRICE.keys()])
algo = Algorithm(model, 20, depth=0)


subgraph = graph.bfs_sampling(algo._max_expected_len, roots=model.getSeeds())
for s in seeds:
    for attr, value in graph.nodes[s].items():
        subgraph.nodes[s][attr] = value
print(len(subgraph))
print(len(subgraph.edges))

# test
out_neighbors = list(subgraph.neighbors('2543'))
neighbor = out_neighbors[0]
topicModel._mappingNode['2543_same'] = topicModel._mappingNode[neighbor]
subgraph.add_node('2543_same')
subgraph._initNode('2543_same')
subgraph.add_edge('2543', '2543_same', weight=1)
# end test

cluster_graph = ClusterGraph(graph = subgraph, 
                                    seeds = seeds,
                                    located = False,
                                    depth = 1,
                                    theta = 0.95)

Constructing graph...
Connecting the edges...Done
396
40944


In [31]:
import numpy as np
from numpy.linalg import norm

u = subgraph.nodes['2407']['topic']
v = subgraph.nodes['2543_same']['topic']
np.dot(u, v)/(norm(u)*norm(v)) >= 1

True

In [12]:
nodes_tree = set()
for s in seeds:
   T = nx.bfs_tree(subgraph, s, depth_limit=1)
   nodes_tree |= set(T.nodes)
len(nodes_tree)

232

In [3]:
cluster_graph.nodes['2407']

{'desired_set': None,
 'adopted_set': None,
 'nodes': {'2407'},
 'topic': [0.31636895949932764,
  0.28505262427644096,
  0.03970837254940788,
  0.07894647312801878,
  0.27992357054680467],
 'adopted_records': []}

In [13]:
len(cluster_graph.nodes)

97