In [1]:
import pymongo
client = pymongo.MongoClient()
db = client.test

# Approach 1: Pure linking

In [2]:
posts = db.posts
comments = db.comments
posts.drop()
comments.drop()
comments.create_index('post_id')

'post_id_1'

In [3]:
def make_post(title, author, text):
    result = posts.insert_one({
        'title': title,
        'author': author,
        'text': text,
    })
    return result.inserted_id


def make_comment(post_id, author, text):
    result = comments.insert_one({
        'post_id': post_id,
        'author': author,
        'text': text,
    })
    return result.inserted_id


def get_post_with_comments(post_id):
    post = posts.find_one({'_id': post_id})
    comments_ = list(comments.find({'post_id': post_id}))
    return dict(post=post, comments=comments_)

In [4]:
post_id = make_post(
    title='First post', author='Joey', text='My very first post')
post_id

ObjectId('5b34fc056cb0046f92f152c8')

In [5]:
comment_id = make_comment(
    post_id=post_id,
    author='Mancy',
    text='This is an uninteresting post',
)

In [6]:
get_post_with_comments(post_id)

{'post': {'_id': ObjectId('5b34fc056cb0046f92f152c8'),
  'title': 'First post',
  'author': 'Joey',
  'text': 'My very first post'},
 'comments': [{'_id': ObjectId('5b34fc056cb0046f92f152c9'),
   'post_id': ObjectId('5b34fc056cb0046f92f152c8'),
   'author': 'Mancy',
   'text': 'This is an uninteresting post'}]}

## Pure linking

*Pros*

- Documents don't grow (more important in older MongoDB)
- Familiar to relational users

*Cons*

- Must perform 2 queries, fetching many documents, for each page view
- Extra index comments.post_id must be created/maintained

# Approach 2: Pure embedding

In [7]:
posts = db.posts
comments = db.comments
posts.drop()
comments.drop()

In [8]:
def make_post(title, author, text):
    result = posts.insert_one({
        'title': title,
        'author': author,
        'text': text,
        'comments': [],
    })
    return result.inserted_id


def make_comment(post_id, author, text):
    result = posts.update_one(
        {'_id': post_id},
        {'$push': {'comments': {
            'author': author,
            'text': text}}
        })
    return post_id


def get_post_with_comments(post_id):
    post = posts.find_one({'_id': post_id})
    comments = post.pop('comments')
    return dict(post=post, comments=comments)

In [9]:
post_id = make_post(
    title='First post', author='Joey', text='My very first post')
make_comment(
    post_id=post_id,
    author='Mancy',
    text='This is an uninteresting post')

ObjectId('5b34fc056cb0046f92f152ca')

## Pure embedding

*Pros*

- Single query per page view
- No extra indexes

*Cons*

- Documents grow (more important in older MongoDB)
- Unfamiliar to relational users

# Hybrid approach: Pagination

In [10]:
posts = db.posts
comment_pages = db.comment_pages
posts.drop()
comment_pages.drop()
comment_pages.create_index('post_id')

'post_id_1'

In [11]:
def make_post(title, author, text):
    result = posts.insert_one({
        'title': title,
        'author': author,
        'text': text
    })
    return result.inserted_id


def make_comment(post_id, author, text):
    result = comment_pages.update_one(
        {'post_id': post_id, 'num_comments': {'$lt': 10}},
        {'$push': {'comments': {
            'author': author,
            'text': text}},
         '$inc': {'num_comments': 1}
        },
        upsert=True)
    return result


def get_post_with_comments(post_id):
    post = posts.find_one({'_id': post_id})
    q = comment_pages.find({'post_id': post_id})
    q = q.sort('_id')
    comments = []
    for pg in q:
        comments += pg['comments']
    return dict(post=post, comments=comments)

In [12]:
post_id = make_post(
    title='First post', author='Joey', text='My very first post')
make_comment(
    post_id=post_id,
    author='Mancy',
    text='This is an uninteresting post')

<pymongo.results.UpdateResult at 0x1103f2888>

In [13]:
get_post_with_comments(post_id)

{'post': {'_id': ObjectId('5b34fc066cb0046f92f152cb'),
  'title': 'First post',
  'author': 'Joey',
  'text': 'My very first post'},
 'comments': [{'author': 'Mancy', 'text': 'This is an uninteresting post'}]}

In [14]:
for x in range(20):
    make_comment(post_id=post_id, author='spam', text='more spam')

In [15]:
list(db.comment_pages.find({}, {'num_comments': 1}))

[{'_id': ObjectId('5b34fc06cd139b5227d8b92a'), 'num_comments': 10},
 {'_id': ObjectId('5b34fc06cd139b5227d8b937'), 'num_comments': 10},
 {'_id': ObjectId('5b34fc06cd139b5227d8b942'), 'num_comments': 1}]

In [16]:
get_post_with_comments(post_id)

{'post': {'_id': ObjectId('5b34fc066cb0046f92f152cb'),
  'title': 'First post',
  'author': 'Joey',
  'text': 'My very first post'},
 'comments': [{'author': 'Mancy', 'text': 'This is an uninteresting post'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spam', 'text': 'more spam'},
  {'author': 'spa

## Hybrid approach with paging

- Smaller # of documents retrieved than linking
- Same number of indexes as linking
- Tricky to get right
- Posts don't grow, comment pages don't grow *much*