In [1]:
%load_ext autoreload
%autoreload 2

# 1. Media Creation Example

This example shows how to create a media object from a file and upload it to the server. It save the metadata in the postgres db and uploads the files to the s3 bucket.

In [5]:
from rts.db.dao import DataAccessObject
from rts.settings import DATABASE_URL
from rts.db.queries import get_library_id_from_name, get_all_media, delete_media
from rts.io.media import upload_media_files
from rts.storage.storage import get_storage_client
from rts.api.models import Media
import os
import hashlib
import pandas as pd

ARCHIVE_BASE_PATH = os.getenv("BASE_PATH")

In [6]:
# Setup before we can create media objects
archive_name =  "rts"
bucket_name =  archive_name
library_id =  get_library_id_from_name(archive_name)
library_id

In [5]:
clips = []
data_dir = os.path.join(ARCHIVE_BASE_PATH, "archive")

count = 0
archive_path = os.path.join(ARCHIVE_BASE_PATH, "archive")
print(archive_path)
for dirpath, dirnames, filenames in os.walk(archive_path):
    if 'clips' not in dirnames:
        continue
    
    clip_dir = os.path.join(dirpath, 'clips', 'videos')

    for clip in os.listdir(clip_dir):
        clip_path = os.path.join(clip_dir, clip)
        try:
            dirname = dirpath.split('/')[-1]

            clips.append(Media(**{
                    'media_id': clip.split('.')[0],
                    'original_path': clip_path,
                    'original_id': dirpath.split("/")[-1],
                    'media_path': f"videos/{dirname}/{clip}", 
                    'media_type': "video",
                    'sub_type': "clip", 
                    'size': os.path.getsize(clip_path), 
                    'metadata': {},
                    'library_id': library_id, 
                    'hash': hashlib.md5(f"{bucket_name}/videos/{dirname}/{clip}".encode()).hexdigest(), 
                    'parent_id': -1,
                    'start_ts': 0, 'end_ts': 10, 
                    'start_frame': 0, 'end_frame': 10, 'frame_rate': 30, 
            }))

        except NotADirectoryError:
            pass

    count += 1
    if count > 5:
        break    
len(clips)

/Users/arattinger/Projects/rts/rts/testdata/archive


21

In [6]:
uploaded_clips = upload_media_files(clips)

In [7]:
thumbnails = []
square_res = 256
media_keys = set()

for clip in uploaded_clips:
    clip_rel_dir = os.path.join('clips', 'images', '256px')
    clip_dir = os.path.join(archive_path, clip['original_id'], clip_rel_dir)
    try:
        for img in os.listdir(clip_dir):

            img_path = os.path.join(clip_dir, img)
            media_path = f"images/{dirname}/{square_res}px/{img}"

            if media_path in media_keys:
                continue

            thumbnails.append(Media(**{
                    'media_id': img.split('.')[0],
                    'original_path': img_path,
                    'original_id': clip['original_id'],
                    'media_path': media_path, 
                    'media_type': "image",
                    'sub_type': "thumbnail", 
                    'size': os.path.getsize(clip_path),
                    'metadata': {},
                    'library_id': library_id, 
                    'hash': hashlib.md5(f"videos/{dirname}/{img}".encode()).hexdigest(), 
                    'parent_id': clip['media_id'],
                    'start_ts': 0, 'end_ts': 10, 
                    'start_frame': 0, 'end_frame': 10, 'frame_rate': 30, 
            }))
            media_keys.add(media_path)

    except FileNotFoundError:
        print(f"Directory not found: {clip_dir}")
        continue
len(thumbnails)

63

In [8]:
uploaded_thumbnails = upload_media_files(thumbnails)

# Find and delete media objects

In [22]:
from tqdm.notebook import trange, tqdm

In [None]:
# Not recommended to run this, it will take a long time
media = get_all_media()
len(media)

In [11]:
df = pd.DataFrame(media)[['media_id', 'media_path']]

In [15]:
df = df[df.media_id.apply(lambda x: 'GA' in x)]

In [19]:
for i, row in df.iterrows():
    delete_media(row['media_id'])

In [30]:
bucket_name = "rts"

In [31]:
for i, row in tqdm(df.iterrows()):
    # print(i, row['media_path'])
    get_storage_client().client.delete_object(Bucket=bucket_name, Key=row['media_path'])
    # (bucket_name, row['media_path'])

0it [00:00, ?it/s]