In [40]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import datetime

In [41]:
# Import the data from the json file
# Data is in format {},{},{},... and in a text file

# Read the data from the file
with open('../../Dataset/build_dataset.txt') as f:
    data = f.readlines()

# Convert the data to a list of dictionaries
data = [json.loads(x.strip()) for x in data]

In [42]:
def get_readers_by_document(data, doc_uuid):
    all_readers = []
    for record in data:
        if 'subject_doc_id' in record and  record['subject_doc_id'] == doc_uuid:
            all_readers.append(record['visitor_uuid'])
    return all_readers


In [43]:
def get_document_by_readers(data, visitor_uuid):
    all_docs = []
    for record in data:
        if 'visitor_uuid' in record and record['visitor_uuid'] == visitor_uuid and 'subject_doc_id' in record:
            all_docs.append(record['subject_doc_id'])
    return all_docs


In [44]:
def also_likes(data, doc_uuid, visitor_uuid = None, sorting_function=None):

    if sorting_function is None:
        sorting_function = lambda x: x[1]['count']

    all_readers = set(get_readers_by_document(data, doc_uuid))

    #if a valid visitor_uuid is given, remove it from the list of readers
    if visitor_uuid is not None:
        all_readers.remove(visitor_uuid)

    liked_documents = {}

    for reader in all_readers:
                documents = set(get_document_by_readers(data, reader))
                for doc in documents:
                    if doc not in liked_documents:
                        liked_documents[doc] = {'count': 0, 'readers': set()}
                    liked_documents[doc]['count'] += 1
                    liked_documents[doc]['readers'].add(reader)

    sorted_docs = sorted(liked_documents.items(), key=sorting_function, reverse=True)

    return sorted_docs[:10]

100713205147-2ee05a98f1794324952eea5ca678c026 

489c02f3e258c199

In [45]:
doc_uuid = "100713205147-2ee05a98f1794324952eea5ca678c026"  # Replace with actual document UUID
visitor_uuid = "489c02f3e258c199"  # Replace with actual visitor UUID

# Get the top 10 'also likes' documents
top_liked_docs = also_likes(data, doc_uuid, visitor_uuid)
for doc in top_liked_docs:
    print(doc)


('100713205147-2ee05a98f1794324952eea5ca678c026', {'count': 3, 'readers': {'76175bb1ea9805a1', 'cee42a0927c5f2da', '232eeca785873d35'}})
('131202094202-a4ae3185bc84368f14bff266d276eb4b', {'count': 3, 'readers': {'76175bb1ea9805a1', 'cee42a0927c5f2da', '232eeca785873d35'}})
('140218233015-c848da298ed6d38b98e18a85731a83f4', {'count': 3, 'readers': {'76175bb1ea9805a1', 'cee42a0927c5f2da', '232eeca785873d35'}})
('131218101426-7fe24377c762b8fe53d21b65fcfa9b25', {'count': 2, 'readers': {'76175bb1ea9805a1', 'cee42a0927c5f2da'}})
('131105193559-dbac395e3cc43fc2b0077eaf789183bb', {'count': 1, 'readers': {'cee42a0927c5f2da'}})
('140101075322-b9180e4eddbece0371da647a6ca0e939', {'count': 1, 'readers': {'76175bb1ea9805a1'}})
('140128221921-4c304a111b9e0a3425f419563ad6e29a', {'count': 1, 'readers': {'232eeca785873d35'}})
('130902223509-8fed6b88ae0937c1c43fb30cb9f87ad8', {'count': 1, 'readers': {'232eeca785873d35'}})
('140228083520-000000008d3679dbb78286526bd8c14b', {'count': 1, 'readers': {'232eeca7

In [46]:
from graphviz import Digraph

# Assuming the also_likes function and other necessary functions are defined
# Here's an implementation of the graph generation:

def generate_graph(data, doc_uuid, visitor_uuid = None, sorting_function=None):
    # Create a Digraph object
    dot = Digraph(comment='Also Likes Graph')

    # Highlight the input document and visitor
    mainDocId = doc_uuid[-4:]
    mainVisitorId = visitor_uuid[-4:]
    dot.node(mainDocId, mainDocId, style='filled', fillcolor='green')
    if visitor_uuid:
        dot.attr('node', shape='box')
        dot.node(mainVisitorId, mainVisitorId, style='filled', fillcolor='green')
        dot.attr('node', shape='ellipse')
        # Add an edge from the visitor to the document
        dot.edge(mainVisitorId, mainDocId)    

    # Get the list of "also likes" documents using the also_likes function
    if sorting_function is None:
        sorting_function = lambda x: x[1]['count']
    also_likes_docs = also_likes(data, doc_uuid, visitor_uuid)
    
    # For each "also likes" document, get the readers and create edges
    for doc, info in also_likes_docs:

        docId = doc[-4:]
        # Add the document node
        dot.node(docId, docId)

        readers = info['readers']

        for reader in readers:
            readerId = reader[-4:]
            # Add the reader node
            dot.attr('node', shape='box')
            dot.node(readerId, readerId)
            dot.attr('node', shape='ellipse')
            # Add an edge from reader to the document
            dot.edge(readerId, docId)

    # Generate and save the graph
    print(dot.source) 
    dot.render('also_likes_graph', format='png', cleanup=True)  # Save the graph as a PNG file


# Call the function with the specific document UUID and visitor UUID
generate_graph(data, "100713205147-2ee05a98f1794324952eea5ca678c026", "489c02f3e258c199")


// Also Likes Graph
digraph {
	c026 [label=c026 fillcolor=green style=filled]
	node [shape=box]
	c199 [label=c199 fillcolor=green style=filled]
	node [shape=ellipse]
	c199 -> c026
	c026 [label=c026]
	node [shape=box]
	"05a1" [label="05a1"]
	node [shape=ellipse]
	"05a1" -> c026
	node [shape=box]
	f2da [label=f2da]
	node [shape=ellipse]
	f2da -> c026
	node [shape=box]
	"3d35" [label="3d35"]
	node [shape=ellipse]
	"3d35" -> c026
	eb4b [label=eb4b]
	node [shape=box]
	"05a1" [label="05a1"]
	node [shape=ellipse]
	"05a1" -> eb4b
	node [shape=box]
	f2da [label=f2da]
	node [shape=ellipse]
	f2da -> eb4b
	node [shape=box]
	"3d35" [label="3d35"]
	node [shape=ellipse]
	"3d35" -> eb4b
	"83f4" [label="83f4"]
	node [shape=box]
	"05a1" [label="05a1"]
	node [shape=ellipse]
	"05a1" -> "83f4"
	node [shape=box]
	f2da [label=f2da]
	node [shape=ellipse]
	f2da -> "83f4"
	node [shape=box]
	"3d35" [label="3d35"]
	node [shape=ellipse]
	"3d35" -> "83f4"
	"9b25" [label="9b25"]
	node [shape=box]
	"05a1" [label="05a