In [72]:
#get credentials

import os
from dotenv import load_dotenv
from notion_client import Client

load_dotenv()

NOTION_API_TOKEN = os.getenv('NOTION_SENSENETS_TOKEN')
URLS_DATABASE_ID = os.getenv('NOTION_URLS_DATABASE_ID')
POSTS_DATABASE_ID = os.getenv('NOTION_POSTS_DATABASE_ID')
PROFILES_DATABASE_ID =os.getenv('NOTION_PROFILES_DATABASE_ID')
HEADERS = {
    'Authorization': f'Bearer {NOTION_API_TOKEN}',
    'Content-Type': 'application/json',
    'Notion-Version': '2022-06-28'
}
notion = Client(auth=NOTION_API_TOKEN)


In [97]:
#functions for mapping json to Notion profile table
def search_profile_in_notion(twitterId):
    response = notion.databases.query(
        **{
            "database_id": PROFILES_DATABASE_ID,
            "filter": {
                "property": "Twitter id",  # The property name in Notion
                "rich_text": {
                    "equals": twitterId  # Use 'equals' to match the authorId exactly
                }
            }
        }
    )
    
    # Check if any results are found
    if response["results"]:
        print(f"Profile found for authorId: {twitterId}")
        return response["results"][0]["id"]  # Return the first matched profile ID if it exists
    else:
        print(f"No profile found for authorId: {twitterId}")
        return None

def build_base_notion_profile(profile:dict):
    print("in build profile page")
    author = profile["generic"]["author"]
    name = author['name']
    #make handle modular to include mastodon
    handle = author["username"]
    authorId = profile["authorId"]
    twitterId = author['id']
    response = notion.pages.create(
        **{
            "parent": {"database_id": PROFILES_DATABASE_ID},
            "properties": {
                "Name": {
                    "title": [
                        {
                            "text": {"content": name}
                        }
                    ]
                },
                "Twitter Handle": {
                     "rich_text": [
                        {
                            "text": {"content": handle}
                        }
                    ]
                },
                "authorId": {
                     "rich_text": [
                        {
                            "text": {"content": authorId}
                        }
                    ]
                },
                "Twitter id": {
                     "rich_text": [
                        {
                            "text": {"content": twitterId}
                        }
                    ]
                },
                 "Platforms": {
                    "multi_select": [
                        {"name": 'Twitter'} 
                    ]
                }
            }
        }
    )
    
    print(f"Created new page for profile with name: {name}")
    return response["id"]  # Return the page ID of the created URL page

    

In [86]:
#testing functions 
import json
with open('data/test_json.json') as f:
        data = json.load(f)


In [98]:
build_base_notion_profile(data)

in build profile page
Created new page for profile with name: Jay


'10396ae9-0655-81af-85d8-e25fb47f61f8'

In [90]:
search_profile_in_notion('333')

Profile found for authorId: 333


'10396ae9-0655-81c9-a245-f2e1ca23f735'

In [87]:
print(data)

{'id': 'SD6jPYKAOe1WuScg5cIP', 'createdAtMs': 1720217724000, 'republishedStatus': 'pending', 'origin': 'twitter', 'authorId': 'nanopub:0xe475Fa8b885bC709A5820313EFC1d4509ad2CDeE', 'generic': {'author': {'name': 'Jay', 'id': '123641282', 'platformId': 'twitter', 'username': 'metasdl'}}}


In [79]:
search_profile_in_notion('123')

Profile found for authorId: 123


'ba52db95-af69-4704-bd09-95a7c87ee781'

In [51]:
import json


# Function to search for a URL in the URLs database
def search_url_in_database(url):
    print('in search url')
    response = notion.databases.query(
        **{
            "database_id": URLS_DATABASE_ID,
            "filter": {
                "property": "URL",
                "url": {
                    "equals": url
                }
            }
        }
    )
    
    if response["results"]:
        print(f"URL found in database: {url}")
        return response["results"][0]["id"]  # Return the first matched page ID if the URL exists
    else:
        print(f"URL not found, will create a new page: {url}")
        return None

# Function to create a new page in the URLs database
def create_url_page(url):
    print("in create page")
    response = notion.pages.create(
        **{
            "parent": {"database_id": URLS_DATABASE_ID},
            "properties": {
                "Name": {
                    "title": [
                        {
                            "text": {"content": url}
                        }
                    ]
                },
                "URL": {
                    "url": url
                }
            }
        }
    )
    
    print(f"Created new page for URL: {url}")
    return response["id"]  # Return the page ID of the created URL page

# Function to search for a post in the Posts database by title
def search_post_in_database(post_title):
    response = notion.databases.query(
        **{
            "database_id": POSTS_DATABASE_ID,
            "filter": {
                "property": "Name",
                "title": {
                    "equals": post_title
                }
            }
        }
    )
    
    if response["results"]:
        print(f"Post '{post_title}' found in the database.")
        return response["results"][0]["id"]  # Return the first matched post ID if it exists
    else:
        print(f"Post '{post_title}' not found, will create a new page.")
        return None

# Function to create a new page in the Posts database
def create_post_page(post_title, recommendation_ids,post_text,post_creators_account):
    response = notion.pages.create(
        **{
            "parent": {"database_id": POSTS_DATABASE_ID},
            "properties": {
                "Name": {
                    "title": [
                        {
                            "text": {"content": post_title}
                        }
                    ]
                },
                "Recommends": {
                    "relation": [{"id": rec_id} for rec_id in recommendation_ids]
                },
                "Text": {
                    "rich_text": [
                        {
                            "text": {"content": post_text}
                        }
                    ]
                },
                "Creators handle": {
                    "rich_text": [
                        {
                            "text": {"content": post_creators_account}
                        }
                    ]
                }
            }
        }
    )
    print(f"Created new post page for '{post_title}'.")
    return response["id"]  # Return the page ID of the created post page

# Function to update the recommendations field in the Posts database
def update_post_relations(post_id, recommendation_ids,post_text,post_creators_account):
    notion.pages.update(
        **{
            "page_id": post_id,
            "properties": {
                "Recommends": {
                    "relation": [{"id": page_id} for page_id in recommendation_ids]
                },
                "Text": {
                    "rich_text": [
                        {
                            "text": {"content": post_text}
                        }
                    ]
                },
                "Creators handle": {
                    "rich_text": [
                        {
                            "text": {"content": post_creators_account}
                        }
                    ]
                }
            }
        }
    )
    print(f"Updated post {post_id} with new recommendations.")

# Step 3: Process the JSON file
def process_json(json_file):
    with open(json_file) as f:
        data = json.load(f)

        for post in data:
            post_name = create_post_name(post)  # Assuming the data has a Title field to identify posts
            recommendations = post.get('Recommendations', [])
            # First, search if the Post already exists
            post_id = search_post_in_database(post_name)
            post_text = post['Content']
            post_creators_account = post["User"]
            
            print(f"Processing Post ID: {post_id}")
            if recommendations:
                recommendation_ids = []
                
                for url in recommendations:
                    url = url.strip()
                    
                    # First, search if the URL already exists
                    url_page_id = search_url_in_database(url)
                    
                    if not url_page_id:  # If not found, create a new page for the URL
                        url_page_id = create_url_page(url)

                    
                    if url_page_id:
                        recommendation_ids.append(url_page_id)
                
            # If the post exists, update it, otherwise create a new post
            if post_id:
                update_post_relations(post_id, recommendation_ids,post_text,post_creators_account)
            else:
                create_post_page(post_name, recommendation_ids,post_text,post_creators_account)

def create_profile_from_post(post:dict,post_notion_id:str):
    response = notion.pages.create(
        **{
            "parent": {"database_id": PROFILES_DATABASE_ID},
            "properties": {
                "Name": {
                    "title": [
                        {
                            "text": {"content": post['author_name']}
                        }
                    ]
                },
                "Posts": {
                    "relation": [{"id": rec_id} for rec_id in recommendation_ids]
                },
                "Text": {
                    "rich_text": [
                        {
                            "text": {"content": post_text}
                        }
                    ]
                },
                "Creators handle": {
                    "rich_text": [
                        {
                            "text": {"content": post_creators_account}
                        }
                    ]
                }
            }
        }
    )


#Create post name
def create_post_name(post):
    user_name = post['User']  # Get the user's name
    content = post['Content']  # Get the post content
    short_content = content[:60]  # Extract the first 10 characters of the content
    
    # Create the post name as "User name : First 10 characters of content"
    return f"{user_name}: {short_content}"

In [52]:
# Call the function to process the JSON file
process_json('data/test_json.json')

Post 'John Doe: Artificial intelligence is revolutionizing industries with i' found in the database.
Processing Post ID: 0ff96ae9-0655-8179-ac5f-d7f18cc1c3a7
in search url
URL found in database: https://example.com/ai1
in search url
URL found in database: https://example.com/ai2
Updated post 0ff96ae9-0655-8179-ac5f-d7f18cc1c3a7 with new recommendations.
Post 'Jane Smith: Data science helps uncover insights from vast amounts of dat' found in the database.
Processing Post ID: 0ff96ae9-0655-8178-910b-df13842c8bc2
in search url
URL found in database: https://example.com/ds1
Updated post 0ff96ae9-0655-8178-910b-df13842c8bc2 with new recommendations.
Post 'Alice Johnson: Machine learning algorithms are key to developing intelligen' found in the database.
Processing Post ID: 0ff96ae9-0655-813a-bba1-c9c206ea30f5
in search url
URL found in database: https://example.com/ml1
in search url
URL found in database: https://example.com/ml2
Updated post 0ff96ae9-0655-813a-bba1-c9c206ea30f5 with new rec

In [23]:
#Testing access to Notion databases 

# Function to read all entries from the Posts database
def read_posts_database():
    try:
        response = notion.databases.query(database_id=POSTS_DATABASE_ID)
        results = response.get("results", [])
        print(f"Posts Database contains {len(results)} entries.")
        for entry in results:
            print(f"Post ID: {entry['id']}, Title: {entry['properties']['Name']['title'][0]['text']['content']}")
    except Exception as e:
        print(f"Error reading Posts Database: {e}")

# Function to read all entries from the URLs database
def read_urls_database():
    try:
        response = notion.databases.query(database_id=URLS_DATABASE_ID)
        results = response.get("results", [])
        print(f"URLs Database contains {len(results)} entries.")
        for entry in results:
            print(f"URL Page ID: {entry['id']}, URL: {entry['properties']['URL']['url']}")
    except Exception as e:
        print(f"Error reading URLs Database: {e}")

# Test reading the databases
print("Reading Posts Database:")
read_posts_database()

print("\nReading URLs Database:")
read_urls_database()


Reading Posts Database:
Posts Database contains 3 entries.
Post ID: 26155953-b777-45d1-9613-320d671a539c, Title: Hihi
Post ID: 5fa16e7e-93e8-4955-af0e-6e42c68b6881, Title: Lala
Post ID: bcfc1994-ad55-41f2-8c30-ac8baabef624, Title: Shalom

Reading URLs Database:
URLs Database contains 1 entries.
URL Page ID: 4d0830cf-0b5a-439f-bf72-2e6a0a888dee, URL: www.example.org


# Parsing RDF

In [59]:
from rdflib import Graph, Namespace, URIRef

# RDF data (replace with your actual RDF data)
rdf_data = """
@prefix ns1: <https://sense-nets.xyz/> .
@prefix ns2: <http://purl.org/spar/cito/> .
@prefix schema: <https://schema.org/> .

ns1:mySemanticPost ns2:includesQuotationFrom <https://x.com/maxkreminski/status/1030838313429528576> ;
    schema:keywords "AI",
        "Ethereum",
        "blockchain",
        "crowdsourcing",
        "open-source",
        "research" ;
    ns1:indicatesInterest <https://x.com/maxkreminski/status/1030838313429528576> ;
    ns1:quotesPost <https://x.com/maxkreminski/status/1030838313429528576> .

<https://x.com/maxkreminski/status/1030838313429528576> ns1:hasZoteroItemType "forumPost" .

"""

# Load RDF data into an rdflib graph
g = Graph()
g.parse(data=rdf_data, format="turtle")



<Graph identifier=N3ecdf6f295eb4c00bad5f234b6725c1c (<class 'rdflib.graph.Graph'>)>

In [62]:
# Define namespaces (based on your RDF data)
NS1 = Namespace("http://purl.org/spar/cito/")
SCHEMA = Namespace("https://schema.org/")
COSMO = Namespace("https://sense-nets.xyz/")



In [60]:
# Query for keywords (using SPARQL)
query = """
    SELECT ?keyword
    WHERE {
        ?post schema:keywords ?keyword .
    }
"""
keywords = g.query(query)
print("Keywords:")
for row in keywords:
    print(row.keyword)



Keywords:
AI
Ethereum
blockchain
crowdsourcing
open-source
research


In [71]:
# Query for relations
relations_query = """
    SELECT ?relation ?target
    WHERE {
        ?post ?relation ?target .
        FILTER (?relation NOT IN (schema:keywords, <https://sense-nets.xyz/hasZoteroItemType>)) # Exclude keywords
    }
"""
relations = g.query(relations_query)
print("\nRelations:")
for row in relations:
    print(f"{row.relation} -> {row.target}")




Relations:
https://sense-nets.xyz/indicatesInterest -> https://x.com/maxkreminski/status/1030838313429528576
https://sense-nets.xyz/quotesPost -> https://x.com/maxkreminski/status/1030838313429528576
http://purl.org/spar/cito/includesQuotationFrom -> https://x.com/maxkreminski/status/1030838313429528576


In [58]:
# If you want to extract specific relations (like disagreesWith, discusses, etc.)
specific_relations_query = """
    SELECT ?relation ?target
    WHERE {
        ?post ?relation ?target .
        FILTER (?relation IN (ns1:disagreesWith, ns1:discusses, ns1:reviews))
    }
"""
specific_relations = g.query(specific_relations_query)
print("\nSpecific Relations:")
for row in specific_relations:
    print(f"{row.relation} -> {row.target}")


Specific Relations:
http://purl.org/spar/cito/discusses -> https://x.com/aeberman12/status/1829356227861581938
http://purl.org/spar/cito/disagreesWith -> https://x.com/aeberman12/status/1829356227861581938
http://purl.org/spar/cito/reviews -> https://vaclavsmil.com/2022/07/11/how-the-world-really-works-2/


Testing Firebase client for querrieng 

In [99]:
import firebase_admin
from firebase_admin import credentials, firestore

# Path to your service account key
cred = credentials.Certificate('creds/sensenets-dataset-firebase-adminsdk-rpero-9c552cac56.json')
firebase_admin.initialize_app(cred)

# Initialize Firestore DB
db = firestore.client()


In [100]:
# Get all collections
collections = db.collections()

# Print the collection IDs
for collection in collections:
    print(f'Collection ID: {collection.id}')

Collection ID: activity
Collection ID: platformPosts
Collection ID: posts
Collection ID: profiles
Collection ID: triples
Collection ID: updates
Collection ID: users


In [112]:
# Query the latest 10 users, ordered by the 'created_at' timestamp in descending order
users_ref = db.collection('triples').limit(1)

# Execute the query
docs = users_ref.stream()

# Print the results
for doc in docs:
    print(f'{doc.id} => {doc.to_dict()}')

00HGDA8ZWEDRQ61D5I9o => {'createdAtMs': 1722715949000, 'subject': 'https://jimruttshow.blubrry.net/', 'authorId': 'twitter:550845228', 'object': 'webpage', 'predicate': 'https://sense-nets.xyz/hasZoteroItemType', 'postId': 'VbSNNWfSRxsIj2uGghbb'}


In [115]:
# Define the postId you're searching for
search_post_id = 'VbSNNWfSRxsIj2uGghbb'

# Query the triplets collection by postId
triplets_ref = db.collection('triples').where('postId', '==', search_post_id)

# Execute the query
docs = triplets_ref.stream()
# Print the results
for doc in docs:
    print(f'{doc.id} => {doc.to_dict()}')

00HGDA8ZWEDRQ61D5I9o => {'createdAtMs': 1722715949000, 'subject': 'https://jimruttshow.blubrry.net/', 'authorId': 'twitter:550845228', 'object': 'webpage', 'predicate': 'https://sense-nets.xyz/hasZoteroItemType', 'postId': 'VbSNNWfSRxsIj2uGghbb'}
1nE4TycQ0buDK0OwZ7gf => {'createdAtMs': 1722715949000, 'subject': 'https://sense-nets.xyz/mySemanticPost', 'authorId': 'twitter:550845228', 'object': 'https://open.spotify.com/show/2YRNavdimPddYFaNvQDU7I', 'predicate': 'https://sense-nets.xyz/indicatesInterest', 'postId': 'VbSNNWfSRxsIj2uGghbb'}
4i5lsFBPFDauC3I6VBCP => {'createdAtMs': 1722715949000, 'subject': 'https://sense-nets.xyz/mySemanticPost', 'authorId': 'twitter:550845228', 'object': 'https://www.santafe.edu/culture/podcast-archive#Complexity', 'predicate': 'https://sense-nets.xyz/indicatesInterest', 'postId': 'VbSNNWfSRxsIj2uGghbb'}
67X6N3qTk5eILMalzEaC => {'createdAtMs': 1722715949000, 'subject': 'https://sense-nets.xyz/mySemanticPost', 'authorId': 'twitter:550845228', 'object': 'ht