# Mini Social - Setup and Queries

This notebook connects to PostgreSQL, MongoDB, and Neo4j, then walks you through creating a tiny social media data model and running a few queries.

In [29]:
import os
from dotenv import load_dotenv

# Load environment file
load_dotenv(dotenv_path="../.env", override=True)

# Gather variables (no defaults)
PGHOST = os.getenv("PGHOST")
PGPORT = os.getenv("PGPORT")
PGUSER = os.getenv("POSTGRES_USER")
PGPASSWORD = os.getenv("POSTGRES_PASSWORD")
PGDATABASE = os.getenv("POSTGRES_DB")

MONGO_HOST = os.getenv("MONGO_HOST")
MONGO_PORT = os.getenv("MONGO_PORT")
MONGO_USER = os.getenv("MONGO_INITDB_ROOT_USERNAME")
MONGO_PASSWORD = os.getenv("MONGO_INITDB_ROOT_PASSWORD")

NEO4J_URI = os.getenv("APP_NEO4J_URI")
NEO4J_USER = os.getenv("APP_NEO4J_USER")
NEO4J_PASSWORD = os.getenv("APP_NEO4J_PASSWORD")

# Collect into a dict for nice reporting
env_vars = {
    "PGHOST": PGHOST,
    "PGPORT": PGPORT,
    "POSTGRES_USER": PGUSER,
    "POSTGRES_PASSWORD": PGPASSWORD,
    "POSTGRES_DB": PGDATABASE,
    "MONGO_HOST": MONGO_HOST,
    "MONGO_PORT": MONGO_PORT,
    "MONGO_INITDB_ROOT_USERNAME": MONGO_USER,
    "MONGO_INITDB_ROOT_PASSWORD": MONGO_PASSWORD,
    "APP_NEO4J_URI": NEO4J_URI,
    "APP_NEO4J_USER": NEO4J_USER,
    "APP_NEO4J_PASSWORD": NEO4J_PASSWORD,
}

# Print results and highlight errors
print("\n--- Environment Variable Check ---")
for key, value in env_vars.items():
    if value in (None, ""):
        print(f"❌ ERROR: {key} is not set or empty")
    else:
        print(f"✅ {key} = {value}")



--- Environment Variable Check ---
✅ PGHOST = postgres
✅ PGPORT = 55433
✅ POSTGRES_USER = app
✅ POSTGRES_PASSWORD = app_pw1234
✅ POSTGRES_DB = socialdb
✅ MONGO_HOST = mongo
✅ MONGO_PORT = 27017
✅ MONGO_INITDB_ROOT_USERNAME = app
✅ MONGO_INITDB_ROOT_PASSWORD = app_pw1234
✅ APP_NEO4J_URI = bolt://neo4j:7687
✅ APP_NEO4J_USER = neo4j
✅ APP_NEO4J_PASSWORD = app_pw1234


## PostgreSQL - create tables

In [31]:
import psycopg2
from psycopg2.extras import execute_values

conn = psycopg2.connect(
    host="localhost", port=PGPORT, user=PGUSER, password=PGPASSWORD, dbname=PGDATABASE
)
print(conn)
conn.autocommit = True
cur = conn.cursor()

# Create tables
cur.execute("""CREATE TABLE IF NOT EXISTS users (
  id SERIAL PRIMARY KEY,
  username TEXT UNIQUE NOT NULL,
  full_name TEXT,
  created_at TIMESTAMPTZ DEFAULT NOW()
);
""")

cur.execute("""CREATE TABLE IF NOT EXISTS posts (
  id SERIAL PRIMARY KEY,
  user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  body TEXT NOT NULL,
  created_at TIMESTAMPTZ DEFAULT NOW()
);
""")

print("Tables created.")

<connection object at 0x10b193b50; dsn: 'user=app password=xxx dbname=socialdb host=localhost port=55433', closed: 0>
Tables created.


### Insert sample users and posts

In [32]:
# Insert sample data only if empty
cur.execute("SELECT COUNT(*) FROM users;")
count_users = cur.fetchone()[0]
if count_users == 0:
    users = [("alice", "Alice Example"), ("bob", "Bob Sample"), ("carol", "Carol Demo")]
    execute_values(cur, "INSERT INTO users (username, full_name) VALUES %s ON CONFLICT DO NOTHING;", users)

cur.execute("SELECT COUNT(*) FROM posts;")
count_posts = cur.fetchone()[0]
if count_posts == 0:
    posts = [
        (1, "Hello world from Alice"),
        (2, "Bob here, nice to meet you"),
        (1, "Another post by Alice"),
        (3, "Carol checking in"),
    ]
    execute_values(cur, "INSERT INTO posts (user_id, body) VALUES %s;", posts)

print("Sample data present.")


Sample data present.


### Query posts with users

In [33]:
import pandas as pd
df = pd.read_sql("SELECT p.id, u.username, p.body, p.created_at FROM posts p JOIN users u ON p.user_id=u.id ORDER BY p.id;", conn)
df


  df = pd.read_sql("SELECT p.id, u.username, p.body, p.created_at FROM posts p JOIN users u ON p.user_id=u.id ORDER BY p.id;", conn)


Unnamed: 0,id,username,body,created_at
0,1,alice,Hello world from Alice,2025-10-15 10:03:53.313112+00:00
1,2,bob,"Bob here, nice to meet you",2025-10-15 10:03:53.313112+00:00
2,3,alice,Another post by Alice,2025-10-15 10:03:53.313112+00:00
3,4,carol,Carol checking in,2025-10-15 10:03:53.313112+00:00


## MongoDB - comments collection

In [35]:
from pymongo import MongoClient

mongo_uri = f"mongodb://{MONGO_USER}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/?authSource=admin"
mc = MongoClient(mongo_uri)
mdb = mc["socialdb"]
comments = mdb["comments"]

# Create sample comments if empty
if comments.count_documents({}) == 0:
    comments.insert_many([
        {"post_id": 1, "author": "bob", "text": "Nice first post!", "created_at": pd.Timestamp.utcnow()},
        {"post_id": 1, "author": "carol", "text": "Welcome Alice!", "created_at": pd.Timestamp.utcnow()},
        {"post_id": 2, "author": "alice", "text": "Hi Bob!", "created_at": pd.Timestamp.utcnow()},
    ])
list(comments.find().limit(5))


ServerSelectionTimeoutError: mongo:27017: [Errno 8] nodename nor servname provided, or not known (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 68ef71c6fe665bad9f40aafe, topology_type: Unknown, servers: [<ServerDescription ('mongo', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('mongo:27017: [Errno 8] nodename nor servname provided, or not known (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>

### Query comments for a given post_id

In [6]:
pd.DataFrame(list(comments.find({"post_id": 1}, {"_id": 0})))


Unnamed: 0,post_id,author,text,created_at
0,1,bob,Nice first post!,2025-10-13 09:56:46.374
1,1,carol,Welcome Alice!,2025-10-13 09:56:46.374


## Neo4j - relationships

In [7]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def run_cypher(query, params=None):
    with driver.session() as session:
        return list(session.run(query, params or {}))

# Constraints and sample data
run_cypher("CREATE CONSTRAINT IF NOT EXISTS FOR (u:User) REQUIRE u.username IS UNIQUE")
run_cypher("UNWIND $users AS u MERGE (:User {username: u})", {"users": ["alice", "bob", "carol"]})
run_cypher("MATCH (a:User {username:'alice'}), (b:User {username:'bob'}) MERGE (a)-[:FOLLOWS]->(b)")
run_cypher("MATCH (b:User {username:'bob'}), (c:User {username:'carol'}) MERGE (b)-[:FOLLOWS]->(c)")
run_cypher("MATCH (a:User {username:'alice'}), (c:User {username:'carol'}) MERGE (a)-[:FOLLOWS]->(c)")

# Likes - pretend likes map to post ids from Postgres
run_cypher("""MERGE (p1:Post {id: 1})
MERGE (p2:Post {id: 2})
MERGE (p3:Post {id: 3})
MERGE (p4:Post {id: 4})
WITH p1,p2,p3,p4
MATCH (a:User {username:'alice'}), (b:User {username:'bob'}), (c:User {username:'carol'})
MERGE (b)-[:LIKES]->(p1)
MERGE (c)-[:LIKES]->(p1)
MERGE (a)-[:LIKES]->(p2)
MERGE (a)-[:LIKES]->(p4);
""")
print("Graph prepared.")


Graph prepared.


### Query the follow graph

In [8]:
rows = run_cypher("MATCH (u:User)-[:FOLLOWS]->(v:User) RETURN u.username AS follower, v.username AS followed ORDER BY follower, followed")
pd.DataFrame([dict(r) for r in rows])


Unnamed: 0,follower,followed
0,alice,bob
1,alice,carol
2,bob,carol


## Cross-database example idea

In [9]:
# Example: For each Postgres post, attach MongoDB comment count
posts = pd.read_sql("SELECT id, body FROM posts ORDER BY id", conn)
comment_counts = {doc["post_id"]: doc["count"] for doc in comments.aggregate([{"$group": {"_id": "$post_id", "count": {"$sum": 1}}}, {"$project": {"_id": 0, "post_id": "$_id", "count": 1}}])}
posts["comment_count"] = posts["id"].map(comment_counts).fillna(0).astype(int)
posts


  posts = pd.read_sql("SELECT id, body FROM posts ORDER BY id", conn)


Unnamed: 0,id,body,comment_count
0,1,Hello world from Alice,2
1,2,"Bob here, nice to meet you",1
2,3,Another post by Alice,0
3,4,Carol checking in,0


### Next steps
- Add pagination queries in Postgres
- Model user profiles in MongoDB
- Model richer relationships in Neo4j
- Consider eventual consistency notes across the three stores