In [41]:
import time
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
from supabase import create_client
from datetime import date
from google.cloud import storage
from pytube import YouTube
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

In [42]:
# Connect to database
url = os.environ.get("SUPABASE_URL")
key = os.environ.get("SUPABASE_KEY")
email = os.environ.get("EMAIL")
password = os.environ.get("PASSWORD")
client = create_client(url, key)
user = client.auth.sign_in(email=email, password=password)

# connect to gcloud bucket
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'survai-data-connect.json'
storage_client = storage.Client()
bucket_name = 'js_test_bucket'
bucket = storage_client.bucket(bucket_name)

In [43]:
# All possible entries
all_possible_annotations = ['brawling', 'br', 'restraining', 're', 'pepper spray', 'ps', 'striking', 
                            'st', 'advancing', 'ad', 'crowd', 'cr', 'running', 'ru', 'person on ground', 'pg', 
                            'blood', 'bl', 'throwing', 'th', 'aiming', 'am', 'pointing', 'po', 'nothing', 'no']

value_pairs = {'br': 'brawling', 're': 'restraining', 'ps': 'spray', 'st': 'striking', 'ad': 'advancing', 
                'cr': 'crowd', 'ru': 'running', 'pg': 'person on ground', 'bl': 'blood', 
                'th': 'throwing', 'am': 'aiming', 'po': 'person pointing', 'no': 'nothing'} 

clarity_options = ['easy', 'medium', 'hard']

In [44]:
def upload_to_bucket(blob_name, file_path, bucket_name):
    '''uploads video clip to appropriate folder in gcloud'''
    try:
        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(file_path)
    except Exception as e:
        print(e)

In [49]:
var_data = 'var_data'
urls = 'urls'

In [46]:
user_id = list(client.table('users').select('email', 'id').eq('email', email).execute())[0][1][0]['id']
date_added = str(date.today())


has_dash = False

# begin application that updates database
while True:
   
    vid_url = input("enter full url of youtube video, or enter d to save & exit")
    list_url = vid_url.split('&list')
    newvid_url = str(list_url[0])

    if newvid_url == 'd': break
    if newvid_url == '': continue

    # generate key for dict
    vid_strs = newvid_url.split("=")
    vid_key = vid_strs[-1]
    if len(vid_key) != 11:
        print("url not entered correctly, try again")
        continue

    # check if url is already in table
    existing_urls = list(client.table(urls).select('url').execute())[0][1]
    url_search = next((item for item in existing_urls if item["url"] == newvid_url), None)
    if url_search is not None:
        print('URL ALREADY USED')
        continue

    # update urls table with new url and create url_id
    client.table(urls).insert({'url': newvid_url, 'youtube_id': vid_key}).execute()
    url_id = list(client.table(urls).select('url', 'id').eq('url', newvid_url).execute())[0][1][0]['id']

    time.sleep(1)
    print(f"Current video: {newvid_url}")

    i = 1
    
    # Begin collecting annotations for the video
    while True:
        user_annotation = input("Add annotation (label), or enter 'x' to delete an annot, enter 'd' if done with video")
        
        if newvid_url == '': continue
        if user_annotation == "d": break
        
        #create unique key id
        new_vid_key = vid_key + str(i)
        
        #if label is valid
        if user_annotation in all_possible_annotations:
            print(f'annot_{i}: {user_annotation}')

            clarity_level = input('Input clarity level (easy, medium, hard)')
            print(f'annot_{i} clarity_level: {clarity_level}')

            if clarity_level not in clarity_options:
                print('INCORRECT ENTRY - RETRY')
                continue
            
            user_segment = input("enter segment (ex: 1:12, 1:20)")
            print(f"annot_{i} segment: {user_segment}")

            if user_segment == '': continue

            try:
                #convert time to total seconds
                user_segment = user_segment.replace(",", "")
                time_list = user_segment.split(' ')
                time_list = [item.split(":") for item in time_list]
                time_list[0][0] = float(time_list[0][0]) * 60
                time_list[1][0] = float(time_list[1][0]) * 60
                time_list[0][1] = float(time_list[0][1])
                time_list[1][1] = float(time_list[1][1])
                time_start = time_list[0][0] + float(time_list[0][1])
                time_end = time_list[1][0] + float(time_list[1][1])
                fill_start = str(int(time_start)).zfill(6)
                fill_end = str(int(time_end)).zfill(6)

            except Exception as e:
                print(e)
                print("INCORRECT ENTRY - RETRYING")
                continue

            #convert label abreviations to full word
            if user_annotation in value_pairs.keys():
                user_annotation = value_pairs[user_annotation]
            
            # fill var_data table with new data
            client.table(var_data).insert({'id': new_vid_key, 'label': user_annotation, 
                                            'time_start': time_start, 'time_end': time_end,
                                            'youtube_id': vid_key, 'clarity_level': clarity_level,
                                            'user_id': user_id, 'date_added': date_added, 'url_id': url_id}).execute()

            # download video
            video = YouTube(newvid_url, use_oauth=True, allow_oauth_cache=True) 
            yt_video = video.streams.get_highest_resolution()
            yt_video.download(output_path='', filename=f"{new_vid_key}.mp4")

            if new_vid_key[0] == '-':
                has_dash = True
                os.rename(f'{new_vid_key}.mp4', f'{new_vid_key[1:]}.mp4')
                new_vid_key = new_vid_key[1:]

            # extract subclip
            ffmpeg_extract_subclip(filename=f'{new_vid_key}.mp4', t1=time_start, t2=time_end, 
                                    targetname=f'{new_vid_key}_{fill_start}_{fill_end}.mp4') 
            os.remove(f'{new_vid_key}.mp4')

            if has_dash == True:
                os.rename(f'{new_vid_key}_{fill_start}_{fill_end}.mp4', 
                            f'-{new_vid_key}_{fill_start}_{fill_end}.mp4')
                new_vid_key = '-' + new_vid_key
                has_dash = False
                
            # upload to gcloud
            upload_to_bucket(f'datasets/var/master_videos/{user_annotation}/{new_vid_key}_{fill_start}_{fill_end}.mp4', 
                            f"{new_vid_key}_{fill_start}_{fill_end}.mp4", bucket_name)
            os.remove(f"{new_vid_key}_{fill_start}_{fill_end}.mp4")
            print('successfully uploaded to gcloud')
            i += 1
        
        elif user_annotation == "x":
            annot_to_delete = input("what annot do you want to delete? (ex: annot_1), Or enter x to cancel")

            if annot_to_delete == '': continue
            if annot_to_delete == 'x': continue

            else:
                print(f'{annot_to_delete}: DELETED')
                ind = annot_to_delete[-1:]
                label = list(client.table(var_data).select('label').eq('id', vid_key+ind).execute())[0][1][0]['label']

                # delete clip from gcloud
                blob = bucket.blob(f"datasets/var/master_videos/{label}/{vid_key+ind}_{fill_start}_{fill_end}.mp4")
                blob.delete()
                print('deleted from gcloud successfully')

                # delete row from var_data table
                client.table(var_data).delete().eq('id', vid_key+ind).execute()             
                continue

        else:
            continue   

Current video: https://www.youtube.com/watch?v=UkSbwN8SPRI
annot_1: br
annot_1 clarity_level: easy
annot_1 segment: 0:02, 0:03
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
successfully uploaded to gcloud
annot_2: st
annot_2 clarity_level: medium
annot_2 segment: 0:03, 0:04
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
successfully uploaded to gcloud
annot_3: cr
annot_3 clarity_level: easy
annot_3 segment: 0:07, 0:08
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
successfully uploaded to gcloud
annot_4: pg
annot_4 clarity_level: easy
annot_4 segment: 0:09, 0:10
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
successfully uploaded to gcloud
annot_5: pg
annot_5 clarity_level: easy
annot_5 segment: 0:10, 0;11
could not convert string to float: '0;11'
INCORRECT ENTRY - RETRYING
annot_5: pg
annot_5 clarity_level: easy
annot_5 segment: 0:10, 0:11
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command 

In [53]:
data = list(client.table(var_data).select('label', 'index').execute())[0][1]
data

[{'label': 'nothing', 'index': 10748},
 {'label': 'striking', 'index': 10583},
 {'label': 'striking', 'index': 10568},
 {'label': 'brawling', 'index': 10387},
 {'label': 'person_on_ground', 'index': 10749},
 {'label': 'brawling', 'index': 10584},
 {'label': 'striking', 'index': 10567},
 {'label': 'brawling', 'index': 10388},
 {'label': 'spray', 'index': 10300},
 {'label': 'person_on_ground', 'index': 3830},
 {'label': 'running', 'index': 3831},
 {'label': 'person_on_ground', 'index': 3832},
 {'label': 'aiming', 'index': 3833},
 {'label': 'advancing', 'index': 3834},
 {'label': 'person_pointing', 'index': 3835},
 {'label': 'advancing', 'index': 3836},
 {'label': 'aiming', 'index': 3837},
 {'label': 'aiming', 'index': 3838},
 {'label': 'advancing', 'index': 3839},
 {'label': 'aiming', 'index': 3840},
 {'label': 'aiming', 'index': 3841},
 {'label': 'aiming', 'index': 3842},
 {'label': 'aiming', 'index': 3843},
 {'label': 'running', 'index': 3844},
 {'label': 'running', 'index': 3845},
 {'

In [54]:
for i in data:
    label = i['label']
    index = i['index']
    if ' ' in label:
        new_label = label.replace(' ', '_')
        client.table(var_data).update({'label': new_label}).eq('index', index).execute()       