In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import datetime

In [2]:
# Import the data from the json file
# Data is in format {},{},{},... and in a text file

# Read the data from the file
with open('../../Dataset/build_dataset.txt') as f:
    data = f.readlines()

# Convert the data to a list of dictionaries
data = [json.loads(x.strip()) for x in data]

In [3]:
def get_readers_by_document(data, doc_uuid):
    all_readers = []
    for record in data:
        if 'subject_doc_id' in record and  record['subject_doc_id'] == doc_uuid:
            all_readers.append(record['visitor_uuid'])
    return all_readers


In [4]:
def get_document_by_readers(data, visitor_uuid):
    all_docs = []
    for record in data:
        if 'visitor_uuid' in record and record['visitor_uuid'] == visitor_uuid and 'subject_doc_id' in record:
            all_docs.append(record['subject_doc_id'])
    return all_docs


In [5]:
def also_likes(data, doc_uuid, visitor_uuid = None, sorting_function=None):

    if sorting_function is None:
        sorting_function = lambda x: x[1]['count']

    all_readers = set(get_readers_by_document(data, doc_uuid))

    liked_documents = {}

    for reader in all_readers:
            
            documents = set(get_document_by_readers(data, reader))
            for doc in documents:
                if doc not in liked_documents:
                    liked_documents[doc] = {'count': 0, 'readers': set()}
                liked_documents[doc]['count'] += 1
                liked_documents[doc]['readers'].add(reader)

    sorted_docs = sorted(liked_documents.items(), key=sorting_function, reverse=True)

    return sorted_docs[:10]

In [6]:
doc_uuid = "100713205147-2ee05a98f1794324952eea5ca678c026"  # Replace with actual document UUID
visitor_uuid = "8fd99d4cbfb9b8d8"  # Replace with actual visitor UUID

# Get the top 10 'also likes' documents
top_liked_docs = also_likes(data, doc_uuid)
for doc in top_liked_docs:
    print(doc)


KeyboardInterrupt: 

In [8]:
from graphviz import Digraph

# Assuming the also_likes function and other necessary functions are defined
# Here's an implementation of the graph generation:

def generate_graph(data, doc_uuid, visitor_uuid = None):
    # Create a Digraph object
    dot = Digraph(comment='Also Likes Graph')

    # Highlight the input document and visitor
    mainDocId = doc_uuid[-4:]
    mainVisitorId = visitor_uuid[-4:]
    dot.node(mainDocId, mainDocId, style='filled', fillcolor='green')
    if visitor_uuid:
        dot.attr('node', shape='box')
        dot.node(mainVisitorId, mainVisitorId, style='filled', fillcolor='green')
        dot.attr('node', shape='ellipse')

    # Get the list of "also likes" documents using the also_likes function
    also_likes_docs = also_likes(data, doc_uuid)
    
    # For each "also likes" document, get the readers and create edges
    for doc, info in also_likes_docs:

        docId = doc[-4:]
        # Add the document node
        dot.node(docId, docId)

        readers = info['readers']

        for reader in readers:
            readerId = reader[-4:]
            # Add the reader node
            dot.attr('node', shape='box')
            dot.node(readerId, readerId)
            dot.attr('node', shape='ellipse')
            # Add an edge from reader to the document
            dot.edge(readerId, docId)

    # Generate and save the graph
    print(dot.source) 
    dot.render('also_likes_graph', format='png', cleanup=True)  # Save the graph as a PNG file


# Call the function with the specific document UUID and visitor UUID
generate_graph(data, "100713205147-2ee05a98f1794324952eea5ca678c026", "489c02f3e258c199")


KeyboardInterrupt: 