In [1]:
import rdflib
import json

import requests

as_ld = requests.get("https://www.w3.org/ns/activitystreams", headers={"accept": "application/ld+json"}).json()

ctx = [ "https://www.w3.org/ns/activitystreams"]

    
frame = {
  "@context": "https://www.w3.org/ns/activitystreams",
  "object": {}
}

# The following is necessary due to the pyld document loader being broken:
#
# See "# FIXME: only if html5lib loaded?" on 6573 jsonld.py


def loader(*args, **kwargs):
    return  {
        "contentType": "application/ld+json",
        "contextUrl": None,
        "documentUrl": "https://www.w3.org/ns/activitystreams",
        "document": as_ld,
    }              

from pyld import jsonld
jsonld.set_document_loader(loader)

from glob import glob

sample_files = glob('data/*.json')

from ipywidgets import interact

In [2]:
def sanitize_activity(activity):
    # parsing and reserializing removes nodes not in the context
    G = rdflib.Graph()
    G.parse(data=activity, format='json-ld')
    result = json.loads(G.serialize(format='json-ld', 
                      auto_compact=False, 
                      use_native_types=True,
                      context=ctx))
    # reframing ensures the format is as desired
    return jsonld.frame(result, frame)

def display_sample(filename):

    with open(filename) as f:
        activity = json.load(f)

    print("--- Original JSON ---")
    print(json.dumps(activity, indent=2))
        
    print("--- Sanitized JSON ---")
    print(json.dumps(sanitize_activity(activity), indent=2))

display_sample(sample_files[3])

--- Original JSON ---
{
  "@context": "https://www.w3.org/ns/activitystreams",
  "actor": "https://my_domain/activitypub/munchingcow",
  "cc": [
    "https://my_domain/activitypub/munchingcow/followers"
  ],
  "id": "https://my_domain/activitypub/munchingcow/9c1d3b44-c6f6-4310-9113-a0c3d3208cac/activity",
  "object": {
    "@context": "https://www.w3.org/ns/activitystreams",
    "attributedTo": "https://my_domain/activitypub/munchingcow",
    "cc": [
      "https://my_domain/activitypub/munchingcow/followers"
    ],
    "content": "<p>TEST</p>",
    "id": "https://my_domain/activitypub/munchingcow/9c1d3b44-c6f6-4310-9113-a0c3d3208cac",
    "inReplyTo": null,
    "published": "2023-01-25T16:00:39Z",
    "source": {
      "content": "TEST ",
      "mediaType": "text/markdown"
    },
    "to": [
      "https://www.w3.org/ns/activitystreams#Public"
    ],
    "type": "Note"
  },
  "published": "2023-01-25T16:00:39Z",
  "to": [
    "https://www.w3.org/ns/activitystreams#Public"
  ],
  "type

In [41]:
interact(display_sample, filename=sample_files)

interactive(children=(Dropdown(description='filename', options=('data/mastodon_delete_actor_1.json', 'data/buf…

<function __main__.display_sample(filename)>