In [1]:
from rts.db.dao import DataAccessObject
from rts.db_settings import DATABASE_URL
from rts.db.queries import get_library_id_from_name
from rts.io.media import upload_media_files
from rts.api.models import Media
import os
import hashlib
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
DataAccessObject().connect(DATABASE_URL)

ARCHIVE_BASE_PATH = os.getenv("BASE_PATH")

  from .autonotebook import tqdm as notebook_tqdm


## TODO

- Decouple the upload functions from discovering the files. We need to give them a list of file paths and metadata (minimum one). One of the dangers at the moment is that we might ingest something by accident.
- Make sure that all the data for the atlases are returned with the requests

In [3]:
# Setup before we can create media objects
archive_name =  "rts"
bucket_name =  archive_name
library_id =  get_library_id_from_name(archive_name)

In [22]:
# Upload clips

clips = []
data_dir = os.path.join(ARCHIVE_BASE_PATH, "data")
for dirname in os.listdir(data_dir):
    try:
        clip_dir = os.path.join(data_dir, dirname, 'clips', 'videos')

        for clip in os.listdir(clip_dir):
            clip_path = os.path.join(clip_dir, clip)

            clips.append(Media(**{
                    'original_path': clip_path,
                    'original_id': dirname,
                    'media_path': f"{bucket_name}/videos/{dirname}/{clip}", 
                    'media_type': "video",
                    'media_id': clip.split('.')[0],
                    'sub_type': "clip", 
                    'size': os.path.getsize(clip_path), 
                    'metadata': {},
                    'library_id': library_id, 
                    'hash': hashlib.md5(f"{bucket_name}/videos/{dirname}/{clip}".encode()).hexdigest(), 
                    'parent_id': -1,
                    'start_ts': 0, 'end_ts': 10, 
                    'start_frame': 0, 'end_frame': 10, 'frame_rate': 30, 
            }))

    except NotADirectoryError:
        pass
print(len(clips))

33


In [6]:
uploaded_clips = upload_media_files(clips)

Duplicate


In [24]:
clips[0]

Media(media_id=None, media_path='rts/videos/ZB006020/ZB006020-L002.mp4', original_path='/Users/arattinger/Projects/rts/rts/testdata/data/ZB006020/clips/videos/ZB006020-L002.mp4', original_id='ZB006020', created_at=None, media_type='video', sub_type='clip', size=437415, metadata={}, library_id=1, hash='6349d3b98c85348cb92f6dedef5ed60a', parent_id=-1, start_ts=0.0, end_ts=10.0, start_frame=0, end_frame=10, frame_rate=30.0)

In [26]:
# Upload thumbnails
thumbnails = []
square_res = 256

data_dir = os.path.join(ARCHIVE_BASE_PATH, "data")
for dirname in os.listdir(data_dir):
    try:
        clip_dir = os.path.join(data_dir, dirname, 'clips', 'images', '256px')

        for clip in os.listdir(clip_dir):
            clip_path = os.path.join(clip_dir, clip)

            # Find the parent. There can be multiple ways this can be achieved but
            # most of the time this will not be solely based with the database
            print(clip.split('.')[0])


            thumbnails.append(Media(**{
                    'original_path': clip_path,
                    'original_id': dirname,
                    'media_path': f"{bucket_name}/images/{dirname}/{square_res}px/{clip}", 
                    'media_type': "image", 
                    'sub_type': "thumbnail", 
                    'size': os.path.getsize(clip_path),
                    'metadata': {},
                    'library_id': library_id, 
                    'hash': hashlib.md5(f"{bucket_name}/videos/{dirname}/{clip}".encode()).hexdigest(), 
                    'parent_id': -1,  # Fill in the info here from the media objects
                    'start_ts': 0, 'end_ts': 10, 
                    'start_frame': 0, 'end_frame': 10, 'frame_rate': 30, 
            }))

    except NotADirectoryError:
        pass
print(len(clips))

ZB006020-L001-02
ZB006020-L001-00
ZB006020-L001-01
ZB006020-L002-02
ZB006020-L002-01
ZB006020-L002-00
ZB006020-L000-01
ZB006020-L000-00
ZB006020-L000-02
ZB002020-L001-01
ZB002020-L001-00
ZB002020-L001-02
ZB002020-L002-00
ZB002020-L002-01
ZB002020-L002-02
ZB002020-L000-02
ZB002020-L000-00
ZB002020-L000-01
ZB012020-L001-01
ZB012020-L001-00
ZB012020-L001-02
ZB012020-L003-02
ZB012020-L003-01
ZB012020-L003-00
ZB012020-L004-02
ZB012020-L004-01
ZB012020-L004-00
ZB012020-L000-02
ZB012020-L000-00
ZB012020-L000-01
ZB012020-L002-00
ZB012020-L002-01
ZB012020-L002-02
ZB003020-L001-02
ZB003020-L001-00
ZB003020-L001-01
ZB003020-L000-01
ZB003020-L000-00
ZB003020-L000-02
ZB008020-L001-02
ZB008020-L001-00
ZB008020-L001-01
ZB008020-L003-00
ZB008020-L003-01
ZB008020-L003-02
ZB008020-L000-01
ZB008020-L000-00
ZB008020-L000-02
ZB008020-L002-02
ZB008020-L002-01
ZB008020-L002-00
ZB004020-L001-00
ZB004020-L001-01
ZB004020-L001-02
ZB004020-L003-02
ZB004020-L003-00
ZB004020-L003-01
ZB004020-L000-02
ZB004020-L000-

In [None]:
uploaded_clips = upload_media_files(clips)