# How to Design DynamoDB Data Model for Production

- http://docs.getmoto.org/en/latest/docs/services/dynamodb.html

## Overview

## Case Study - Design YouTube Data Model

Entities:

- User: user can upload video, user can also view other's video
- Video: user can create video
- Channel: user can create channel, then add the video he owned to the channel

YouTube user's behavior:

- people can sign up as an User
- User can upload video
- User can create channel
- User can add video to channel

Query Pattern:

- Given a User id, Video id, Channel id, we can get the detailed information of the user, video, channel.
- Given a User id, we can get all the videos he uploaded, ordered by create time, latest video comes first.
- Given a User id, we can get all the channels he created, ordered by create time, latest video comes first.
- Given a channel id, we can get all the videos in the channel, ordered by create time, latest video comes first.

In [24]:
import typing as T
import enum
from datetime import datetime
import dataclasses
import pynamodb_mate as pm
from moto import mock_dynamodb
from rich import print as rprint

In [26]:
class EntityTypeEnum(str, enum.Enum):
    USER = "USER"
    VIDEO = "VIDEO"
    CHANNEL = "CHANNEL"


@dataclasses.dataclass
class User:
    user_id: int = dataclasses.field()
    user_name: str = dataclasses.field()
    created_at: datetime = dataclasses.field()

    videos: T.List["Video"] = dataclasses.field(default_factory=list)
    channels: T.List["Channel"] = dataclasses.field(default_factory=list)


@dataclasses.dataclass
class Video:
    video_id: int = dataclasses.field()
    video_title: str = dataclasses.field()
    created_at: datetime = dataclasses.field()
    creator_id: str = dataclasses.field()

    creator: T.Optional[User] = dataclasses.field(default=None)


@dataclasses.dataclass
class Channel:
    channel_id: int = dataclasses.field()
    channel_name: str = dataclasses.field()
    created_at: datetime = dataclasses.field()
    creator_id: str = dataclasses.field()

    creator: T.Optional[User] = dataclasses.field(default=None)

    videos: T.List[Video] = dataclasses.field(default_factory=list)


In [67]:
ROOT = "_root" # indicate that this item only has hash key, range key is not used (logically)

class Model(pm.Model):
    class Meta:
        table_name = f"entities"
        region = "us-east-1"
        billing_mode = pm.PAY_PER_REQUEST_BILLING_MODE

    pk: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(
        hash_key=True
    )
    sk: T.Optional[T.Union[str, pm.UnicodeAttribute]] = pm.UnicodeAttribute(
        range_key=True, default=None, null=True
    )
    entity_type: T.Optional[T.Union[str, pm.UnicodeAttribute]] = pm.UnicodeAttribute(
        default=None, null=True
    )

    # --- user
    user_name: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(
        default=None, null=True
    )

    # --- video
    video_id: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(
        default=None, null=True
    )
    video_title: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(
        default=None, null=True
    )

    # --- channel
    channel_id: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(
        default=None, null=True
    )
    channel_name: T.Union[str, pm.UnicodeAttribute] = pm.UnicodeAttribute(null=True)

    # --- common
    created_at: T.Optional[T.Union[datetime, pm.UTCDateTimeAttribute]] = pm.UTCDateTimeAttribute(
        default=None, null=True
    )
    creator_id: T.Optional[T.Union[str, pm.UnicodeAttribute]] = pm.UnicodeAttribute(
        default=None, null=True
    )

    @classmethod
    def signup_user(cls, user: User) -> "Model":
        user_model = cls(
            pk=f"user-{user.user_id}",
            sk=ROOT,
            entity_type=EntityTypeEnum.USER,
            created_at=user.created_at,
        )
        user_model.save()
        return user_model

    @classmethod
    def upload_video(cls, video: Video) -> "Model":
        video_model = cls(
            pk=f"video-{video.video_id}",
            sk=ROOT,
            entity_type=EntityTypeEnum.VIDEO,
            video_id=f"video-{video.video_id}",
            video_title=video.video_title,
            created_at=video.created_at,
            creator_id=video.creator_id,
        )
        video_model.save()

        user_video_lookup_model = cls(
            pk=f"{video.creator_id}-videos",
            sk=str(video.created_at.timestamp()),
            video_id=f"video-{video.video_id}",
            video_title=video.video_title,
            created_at=video.created_at,
            creator_id=video.creator_id,
        )
        user_video_lookup_model.save()
        return video_model

    @classmethod
    def create_channel(cls, channel: Channel) -> "Model":
        channel_model = cls(
            pk=f"channel-{channel.channel_id}",
            sk=ROOT,
            entity_type=EntityTypeEnum.CHANNEL,
            channel_id=f"channel-{channel.channel_id}",
            channel_name=channel.channel_name,
            created_at=channel.created_at,
            creator_id=channel.creator_id,
        )
        channel_model.save()

        user_channel_lookup_model = cls(
            pk=f"{channel.creator_id}-channels",
            sk=str(channel.created_at.timestamp()),
            channel_id=f"channel-{channel.channel_id}",
            channel_name=channel.channel_name,
            created_at=channel.created_at,
            creator_id=channel.creator_id,
        )
        user_channel_lookup_model.save()
        return channel_model

    @classmethod
    def get_user(cls) -> "Model":
        pass

    @classmethod
    def get_users_videos(cls, user_id: int, limit: int = 5):
        """
        Return user's videos, sorted by created time.
        """
        return cls.iter_query(
            hash_key=f"user-{user_id}-videos",
            scan_index_forward=False,
            limit=limit,
        )

    @classmethod
    def get_users_channels(cls, user_id: int, limit: int = 5):
        """
        Return user's videos, sorted by created time.
        """
        return cls.iter_query(
            hash_key=f"user-{user_id}-channels",
            scan_index_forward=False,
            limit=limit,
        )

    @classmethod
    def add_video_to_channel(cls, video: Video, channel: Channel):
        """

        :param video_id:
        :param chanel_id:
        :return:
        """
        chanel_video_model = cls(
            pk=f"channel-{channel.channel_id}-videos",
            sk=str(video.created_at.timestamp()),
            video_id=f"video-{video.video_id}",
            video_title=video.video_title,
            created_at=video.created_at,
            creator_id=video.creator_id,
        )
        chanel_video_model.save()
        return chanel_video_model

    @classmethod
    def get_channel_videos(cls, channel_id: int, limit: int = 5):
        """
        Return channel's videos, sorted by created time.
        """
        return cls.iter_query(
            hash_key=f"channel-{channel_id}-videos",
            scan_index_forward=False,
            limit=limit,
        )

mock = mock_dynamodb()
mock.start()

connect = pm.Connection()
Model.create_table(wait=True)

In [68]:
Model.delete_all()

user1 = User(user_id=1, user_name="alice", created_at=datetime(2020, 1, 1))
user2 = User(user_id=2, user_name="bob", created_at=datetime(2020, 1, 2))
user3 = User(user_id=3, user_name="cathy", created_at=datetime(2020, 1, 3))

Model.signup_user(user1)
Model.signup_user(user2)
Model.signup_user(user3)


video1 = Video(video_id=1, video_title="video1", created_at=datetime(2020, 2, 1), creator_id="user-1")
video2 = Video(video_id=2, video_title="video2", created_at=datetime(2020, 2, 2), creator_id="user-1")
video3 = Video(video_id=3, video_title="video3", created_at=datetime(2020, 2, 3), creator_id="user-1")
video4 = Video(video_id=4, video_title="video4", created_at=datetime(2020, 2, 4), creator_id="user-2")
video5 = Video(video_id=5, video_title="video5", created_at=datetime(2020, 2, 5), creator_id="user-3")
video6 = Video(video_id=6, video_title="video6", created_at=datetime(2020, 2, 6), creator_id="user-3")
video7 = Video(video_id=7, video_title="video7", created_at=datetime(2020, 2, 7), creator_id="user-3")
video8 = Video(video_id=8, video_title="video8", created_at=datetime(2020, 2, 8), creator_id="user-3")
video9 = Video(video_id=9, video_title="video9", created_at=datetime(2020, 2, 9), creator_id="user-3")

Model.upload_video(video1)
Model.upload_video(video2)
Model.upload_video(video3)
Model.upload_video(video4)
Model.upload_video(video5)
Model.upload_video(video6)
Model.upload_video(video7)
Model.upload_video(video8)
Model.upload_video(video9)

channel1 = Channel(channel_id=1, channel_name="channel1", created_at=datetime(2020, 3, 1), creator_id="user-3")
channel2 = Channel(channel_id=2, channel_name="channel2", created_at=datetime(2020, 3, 2), creator_id="user-3")

Model.create_channel(channel1)
Model.create_channel(channel2)

Model.add_video_to_channel(video5, channel1)
Model.add_video_to_channel(video6, channel1)

Model.add_video_to_channel(video7, channel2)
Model.add_video_to_channel(video8, channel2)

for i in Model.iter_scan():
    rprint(i.to_dict())
pass

In [63]:
for video in Model.get_users_videos(user_id=3):
    rprint(video.to_dict())

In [64]:
for channel in Model.get_users_channels(user_id=3):
    rprint(channel.to_dict())

In [66]:
for video in Model.get_channel_videos(channel_id=1):
    rprint(video.to_dict())