# TopJodel - Setup and Queries

This notebook connects to PostgreSQL, MongoDB, and Neo4j, then walks you through creating a tiny social media data model and running a few queries.

In [7]:
import os
from dotenv import load_dotenv

# Load environment file
load_dotenv(dotenv_path=".env", override=True)

# Gather variables (no defaults)
PGHOST = os.getenv("PGHOST")
PGPORT = os.getenv("PGPORT")
PGUSER = os.getenv("POSTGRES_USER")
PGPASSWORD = os.getenv("POSTGRES_PASSWORD")
PGDATABASE = os.getenv("POSTGRES_DB")

MONGO_HOST = os.getenv("MONGO_HOST")
MONGO_PORT = os.getenv("MONGO_PORT")
MONGO_USER = os.getenv("MONGO_INITDB_ROOT_USERNAME")
MONGO_PASSWORD = os.getenv("MONGO_INITDB_ROOT_PASSWORD")

APP_NEO4J_URI = os.getenv("APP_NEO4J_URI")
APP_NEO4J_URI_HTTP = os.getenv("APP_NEO4J_URI_HTTP")
APP_NEO4J_USER = os.getenv("APP_NEO4J_USER")
APP_NEO4J_PASSWORD = os.getenv("APP_NEO4J_PASSWORD")

# Collect into a dict for nice reporting
env_vars = {
    "PGHOST": PGHOST,
    "PGPORT": PGPORT,
    "POSTGRES_USER": PGUSER,
    "POSTGRES_PASSWORD": PGPASSWORD,
    "POSTGRES_DB": PGDATABASE,
    "MONGO_HOST": MONGO_HOST,
    "MONGO_PORT": MONGO_PORT,
    "MONGO_INITDB_ROOT_USERNAME": MONGO_USER,
    "MONGO_INITDB_ROOT_PASSWORD": MONGO_PASSWORD,
    "APP_NEO4J_URI": APP_NEO4J_URI,
    "APP_NEO4J_USER": APP_NEO4J_USER,
    "APP_NEO4J_URI_HTTP": APP_NEO4J_URI_HTTP,
    "APP_NEO4J_PASSWORD": APP_NEO4J_PASSWORD,
}

# Print results and highlight errors
print("\n--- Environment Variable Check ---")
for key, value in env_vars.items():
    if value in (None, ""):
        print(f"❌ ERROR: {key} is not set or empty")
    else:
        print(f"✅ {key} = {value}")



--- Environment Variable Check ---
✅ PGHOST = postgres
✅ PGPORT = 55433
✅ POSTGRES_USER = app
✅ POSTGRES_PASSWORD = app_pw1234
✅ POSTGRES_DB = socialdb
✅ MONGO_HOST = mongo
✅ MONGO_PORT = 27017
✅ MONGO_INITDB_ROOT_USERNAME = app
✅ MONGO_INITDB_ROOT_PASSWORD = app_pw1234
✅ APP_NEO4J_URI = bolt://neo4j:7687
✅ APP_NEO4J_USER = neo4j
✅ APP_NEO4J_URI_HTTP = neo4j://localhost:7687
✅ APP_NEO4J_PASSWORD = app_pw1234


## MongoDB - comments collection

In [8]:
from pymongo import MongoClient
import pandas as pd

MONGO_HOST = "localhost"

mongo_uri = f"mongodb://{MONGO_USER}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/admin"
mc = MongoClient(mongo_uri)
mdb = mc["socialdb"]
comments = mdb["comments"]

# Create sample comments if empty
if comments.count_documents({}) == 0:
    comments.insert_many([
        {"post_id": 1, "author": "bob", "text": "Nice first post!", "created_at": pd.Timestamp.utcnow()},
        {"post_id": 1, "author": "carol", "text": "Welcome Alice!", "created_at": pd.Timestamp.utcnow()},
        {"post_id": 2, "author": "alice", "text": "Hi Bob!", "created_at": pd.Timestamp.utcnow()},
    ])
list(comments.find().limit(5))

OperationFailure: Authentication failed., full error: {'ok': 0.0, 'errmsg': 'Authentication failed.', 'code': 18, 'codeName': 'AuthenticationFailed'}

### Query comments for a given post_id

In [9]:
pd.DataFrame(list(comments.find({"post_id": 1}, {"_id": 0})))


OperationFailure: Authentication failed., full error: {'ok': 0.0, 'errmsg': 'Authentication failed.', 'code': 18, 'codeName': 'AuthenticationFailed'}

## Neo4j - relationships

In [10]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(APP_NEO4J_URI_HTTP, auth=(APP_NEO4J_USER, APP_NEO4J_PASSWORD))

def run_cypher(query, params=None):
    with driver.session() as session:
        return list(session.run(query, params or {}))

# Constraints and sample data
run_cypher("CREATE CONSTRAINT IF NOT EXISTS FOR (u:User) REQUIRE u.username IS UNIQUE")
run_cypher("UNWIND $users AS u MERGE (:User {username: u})", {"users": ["alice", "bob", "carol"]})
run_cypher("MATCH (a:User {username:'alice'}), (b:User {username:'bob'}) MERGE (a)-[:FOLLOWS]->(b)")
run_cypher("MATCH (b:User {username:'bob'}), (c:User {username:'carol'}) MERGE (b)-[:FOLLOWS]->(c)")
run_cypher("MATCH (a:User {username:'alice'}), (c:User {username:'carol'}) MERGE (a)-[:FOLLOWS]->(c)")

# Likes - pretend likes map to post ids from Postgres
run_cypher("""MERGE (p1:Post {id: 1})
MERGE (p2:Post {id: 2})
MERGE (p3:Post {id: 3})
MERGE (p4:Post {id: 4})
WITH p1,p2,p3,p4
MATCH (a:User {username:'alice'}), (b:User {username:'bob'}), (c:User {username:'carol'})
MERGE (b)-[:LIKES]->(p1)
MERGE (c)-[:LIKES]->(p1)
MERGE (a)-[:LIKES]->(p2)
MERGE (a)-[:LIKES]->(p4);
""")
print("Graph prepared.")


Graph prepared.


### Query the follow graph

In [11]:
rows = run_cypher("MATCH (u:User)-[:FOLLOWS]->(v:User) RETURN u.username AS follower, v.username AS followed ORDER BY follower, followed")
pd.DataFrame([dict(r) for r in rows])


Unnamed: 0,follower,followed
0,alice,bob
1,alice,carol
2,bob,carol


## Cross-database example idea

In [12]:
# Example: For each Postgres post, attach MongoDB comment count
posts = pd.read_sql("SELECT id, body FROM posts ORDER BY id", conn)
comment_counts = {doc["post_id"]: doc["count"] for doc in comments.aggregate([{"$group": {"_id": "$post_id", "count": {"$sum": 1}}}, {"$project": {"_id": 0, "post_id": "$_id", "count": 1}}])}
posts["comment_count"] = posts["id"].map(comment_counts).fillna(0).astype(int)
posts


  posts = pd.read_sql("SELECT id, body FROM posts ORDER BY id", conn)


OperationFailure: Authentication failed., full error: {'ok': 0.0, 'errmsg': 'Authentication failed.', 'code': 18, 'codeName': 'AuthenticationFailed'}