In [None]:
import os
import json
from os.path import exists, join, basename, splitext

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

class GoogleDriveDatabase:
  def __init__(self, drive, DATABASE_GID:str):
    assert isinstance(DATABASE_GID, str)
    self.folders = {}
    self.drive = drive
    self.database_gid = DATABASE_GID
    folder_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(DATABASE_GID)}).GetList()
    self.update_folders(drive)
    print("{} folders loaded".format(len(self.folders.keys())))
  def update_folders(self,drive):
    folder_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(self.database_gid)}).GetList()
    self.folders = {}
    for file in folder_list:
      if file['mimeType'] == "application/vnd.google-apps.folder":
        self.folders[file['title']] = file['id']
    return self.folders

  def upload(self, filename, character, fileType):
    assert isinstance(filename, str)
    assert isinstance(character, str)
    assert isinstance(fileType, str)
    FILETYPE_MIME_MAP = {
        "jpeg" : "image/jpeg",
        "json" : "application/json",
        "zip" : "application/zip",
        "avi" : "video/avi"
    }
    assert fileType in FILETYPE_MIME_MAP.keys(), "fileType must be one of the following: {}".format(fileType)
    assert os.path.isfile(filename), "{} does not exist as a file".format(filename)
    assert self.checkFolder(character), "{} is not a valid character. Pick from list: \n{}".format(character, tuple(self.folders.keys()))
    file = self.drive.CreateFile({
        "title" :  os.path.split(filename)[1],
        "mimeType" : FILETYPE_MIME_MAP[fileType],
        "parents" : [{"id" : self.folders[character]}]
    })
    file.SetContentFile(filename)
    file.Upload()
    print("uploaded {}".format(filename))

  @staticmethod
  def allImagesToJPG(fileName) -> str:
    assert isinstance(fileName, str)
    assert any(extension for extension in GoogleDriveDatabase.VIDEO_EXTENSIONS()), "{} is not a valid image".format(fileName)
    if ".avi" in fileName:
      return fileName
    return fileName.split(".")[0] + ".avi"
    
  @staticmethod
  def VIDEO_EXTENSIONS() -> list:
    return [".avi", ".mp4"]

  @staticmethod
  def FILE_EXTENSIONS() -> list:
    return [".jpg", ".jpeg", ".png", ".zip", ".json"]

  def getFiles(self, character) -> list:
    return [
      x for x in self.drive.ListFile({'q': "'{}' in parents and trashed=false".format(self.folders[character])}).GetList()
      if x['mimeType'] != "application/vnd.google-apps.folder" 
    ]
  def download_file_name(self, file_name):
      if not any(extension in file_name for extension in GoogleDriveDatabase.FILE_EXTENSIONS() + GoogleDriveDatabase.VIDEO_EXTENSIONS()):
        file_name += ".avi"
      return GoogleDriveDatabase.allImagesToJPG(file_name)

  def download_file(self, file, folder:str, **kwargs) -> str:
      check_already_exist = kwargs.get("check_local", False)
      file_name = os.path.join(folder, file['title'])
      file_name = self.download_file_name(file_name)
      if check_already_exist:
        local_files = [os.path.join(folder, file) for file in os.listdir(folder)]
        if file_name in local_files:
          return file_name
      file.GetContentFile(file_name)
      print("downloaded", file_name)
      return file_name

  def download(self, character:str,folder:str):
    file_list = self.getFiles(character)
    os.makedirs(folder, exist_ok=True)
    returnlist = []
    
    for file in file_list:
      returnlist.append(self.download_file(file, folder, check_local=True))
    return tuple(returnlist)

  def checkFolder(self, name):
    assert isinstance(name, str)
    return name in self.folders.keys()

  def createFolder(self, name, exist_ok=True):
    if self.checkFolder(name):
      if exist_ok:
        return True
      raise Exception("Folder {} already exists".format(name))
    self.drive.CreateFile({
        "title" : name,
        "mimeType" : "application/vnd.google-apps.folder",
        "parents" : [{"id" : self.database_gid}]
    }).Upload()
  def move_file(self, file_obj, newFolder):
    assert isinstance(newFolder, str)
    assert self.checkFolder(newFolder)
    files = self.drive.auth.service.files()
    file = files.get(fileId=file_obj['id'], fields ='parents').execute()
    prev_parents = ','.join(p['id'] for p in file.get('parents'))
    file = files.update(
        fileId = file_obj['id'],
        addParents = self.folders[newFolder],
        removeParents = prev_parents,
        fields = 'id, parents',
    ).execute()
    return file
  def trash(self, file_obj):
    self.move_file(file_obj, "TRASH")
    print("Sent {} to trash".format(file_obj['title']))

auth.authenticate_user() # Google auth stuff, make sure to sign in with your ucsb account
gauth = GoogleAuth() # Google auth stuff
gauth.credentials = GoogleCredentials.get_application_default() # Google auth stuff
drive = GoogleDrive(gauth) # Google auth stuff

DOWNLOAD_DATABASE = "14HLz4WqhRCRfFfNHSOrnOfUWewpZe2oP"
UPLOAD_DATABASE = "1Z9KA09PAnl0WyEgDLML83EghyqYK1vOe"
download_database = GoogleDriveDatabase(drive, DOWNLOAD_DATABASE)
upload_database = GoogleDriveDatabase(drive,UPLOAD_DATABASE)
CHARACTERS = ("J", "Z")

4 folders loaded
3 folders loaded


In [None]:
import os
import json
from os.path import exists, join, basename, splitext

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Compile mediapipe
git_repo_url = 'https://github.com/AriAlavi/SigNN.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  print("Does not yet exist")
  !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz
  !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local
  !git clone -q --depth 1 $git_repo_url
  !sudo apt install curl
  !curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
  !echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list
  !sudo apt update && sudo apt install bazel-3.3.0
  !sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
                        libopencv-calib3d-dev libopencv-features2d-dev \
                        libopencv-imgproc-dev libopencv-video-dev
  !cd {project_name} && git fetch --all
  !cd {project_name} && git checkout 9941e9cf9a48eae52ed4e43420eb8cafcd3998f5
  !cd {project_name} && git pull
  !cd {project_name} && bazel-3.3.0 build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu 
else:
  print("Already exists")
  !git config --global user.email "none@gmail.com"
  !git config --global user.name "Google colab"
  !cd {project_name} && git stash
  !cd {project_name} && git fetch --all
  !cd {project_name} && git checkout 9941e9cf9a48eae52ed4e43420eb8cafcd3998f5
  !cd {project_name} && git pull
  !cd {project_name} && bazel-3.3.0 build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu 

Already exists
Saved working directory and index state WIP on master: 5012415 Added more mediapipe calculators for signn
Fetching origin
fatal: reference is not a tree: 9941e9cf9a48eae52ed4e43420eb8cafcd3998f5
Already up to date.
[32mAnalyzing:[0m target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_\
[32mAnalyzing:[0m target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_\
[32mAnalyzing:[0m target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_\
[33mDEBUG: [0mRule 'rules_cc' indicated that a canonical reproducible form can be obtained by modifying arguments sha256 = "2a34fa56d923f774409d23720e60ddf6536e88622d000e6925f7cebbad65e281"
[32mAnalyzing:[0m target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_\
[33mDEBUG: [0mRepository rules_cc instantiated at:
  no stack (--record_rule_instantiation_callstack not enabled)
Repository rule http_archive defined at:
  /root/.cache/bazel/_bazel_root/6b05d56d57241dcd7692847de0d8c695

In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from datetime import datetime
from random import randint



import uuid
import os
import time


  
def runMediapipe(input_video):
  assert os.path.isfile(input_video), "{} does not exist".format(input_video)
  # input_video = input_video.encode()
  call = "cd SigNN && sudo GLOG_logtostderr=0 bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/multi_hand_tracking_cpu --calculator_graph_config_file=mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_logger.pbtxt --input_video_path={} --render_video=false".format(input_video)
  os.system(call)

def modifyMediapipeLoggerOutput(path, filename, mediapipe_directory):
  assert isinstance(path, str)
  assert isinstance(filename, str)
  assert filename.split(".")[-1] == "json"
  assert isinstance(mediapipe_directory, str)
  os.makedirs(path, exist_ok=True)
  pbtxt_name = os.path.join(mediapipe_directory, "mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_logger.pbtxt")
  pbtxt_file = open(pbtxt_name, "r+b")
  pbtxt_content = pbtxt_file.readlines()
  line_count = 0
  for line in pbtxt_content:
    if "CoordinateLoggerCalculatorOptions" in str(line):
      pbtxt_content[line_count + 2] = '      logger_path: "{}"\n'.format(path).encode()
      pbtxt_content[line_count + 3] = '      filename: "{}"\n'.format(filename).encode()
      break
    line_count += 1
  pbtxt_file.seek(0)
  pbtxt_file.truncate()
  pbtxt_file.write(''.join([x.decode() for x in pbtxt_content]).encode())
  pbtxt_file.close()

class InvalidMediapipeOutput(Exception):
  pass

strip_name = lambda x: x.split("/")[-1].split(".")[0] # Function to remove extensions (i.e. .mp4 .avi) from filenames

def setupMediapipe(json_name, output_directory):
  assert ".json" in json_name
  print("Setting mediapipe logger output")
  modifyMediapipeLoggerOutput(output_directory, json_name, "/content/SigNN/")

def Mediapipe(input_directory, video_fullname, output_directory):
  video_name = strip_name(video_fullname)
  os.makedirs(output_directory, exist_ok=True)
  video_path = os.path.join(input_directory, video_fullname)
  json_name = video_name + ".json"
  OUTPUT_FILE = os.path.join(output_directory, json_name)
  setupMediapipe(json_name, output_directory)
  print("Running mediapipe on {}".format(video_path))
  runMediapipe(video_path)
  return OUTPUT_FILE



# def download_characters(): # Only downloads characters
#   BASE_DIR = "images/"
#   results = {}
#   for c in CHARACTERS:
#     results[c] = download_database.download(c, os.path.join(BASE_DIR, c + "/"))
#     print("{} pictures downloaded for {}".format(len(results[c]), c))
#   return results
    
def DownloadVideosAndUploadJson(character, max_char):
  assert isinstance(character, str)
  assert isinstance(max_char, int)

  BASE_VIDEOS_DIR = "/content/videos/"
  BASE_JSON_DIR = "/content/json/"

  INPUT_DIRECTORY = os.path.join(BASE_VIDEOS_DIR, character)
  OUTPUT_DIRECTORY = os.path.join(BASE_JSON_DIR, character)
 
  already_analyzed_names = set() # Names of all already analyzed items
  videos_to_analyze = [] # Videos that yet do not have a json file
  videos_to_analyze_names = [] # Names of videos that yet do not have a json file
  BASE_FOLDER = "videos/" # Directory of where all videos are
  character_folder = os.path.join(BASE_FOLDER, character + "/") # Folder in which the videos for this character are in (i.e. videos/J)
  os.makedirs(character_folder, exist_ok=True) # Make the character folder if it doesn't exist
  character_jsons_original = upload_database.getFiles(character) # All .json files created for this character
  character_jsons_names = [strip_name(x['title']) for x in character_jsons_original] # Name of all .json files already created for this character
  video_references = download_database.getFiles(character) # All current videos for this character
  video_reference_names = [strip_name(x['title']) for x in video_references] # Name of all videos
  print("There are {} videos for character {}".format(len(video_references), character))
  print("There are {} json files already made for character {}".format(len(character_jsons_names), character))
  for json_original in character_jsons_original: # For every json file on the google drive...
    if strip_name(json_original['title']) not in video_reference_names: # ...if the json file is not in the list of videos...
      print("{} deleted because no video was found to relate to it".format(json_original['title']))
      upload_database.trash(json_original) # ...then delete the file

  while len(video_references) > 0 and max_char > 0: # For every video on google drive...
    gauth.Refresh()
    video = video_references.pop(randint(0, len(video_references)-1))
    video_name = strip_name(video['title'])
    video_fullname = video['title']
    if video_name in character_jsons_names: # ...if that video already has a json uploaded...
      already_analyzed_names.add(download_database.download_file_name(video['title'])) # ...add its name to the list of already analyzed video
    else:
      video_to_analyze = download_database.download_file(video, character_folder, check_local=True)
      json_path = Mediapipe(INPUT_DIRECTORY, video_fullname, OUTPUT_DIRECTORY)
      time.sleep(1)
      upload_database.upload(json_path, character, "json")
      video_path = os.path.join(INPUT_DIRECTORY, video_fullname)
      os.remove(video_path)
      os.remove(json_path)
      max_char -= 1

  if len(video_references) > 0:
    return False # Not Complete
  print("{} complete".format(character))
  return True # Complete

for character in CHARACTERS:
  while not DownloadVideosAndUploadJson(character, 5):
    gauth.Refresh()



There are 462 videos for character J
There are 451 json files already made for character J
downloaded videos/J/J_09-01-2020_20_48_04.avi
Setting mediapipe logger output
Running mediapipe on /content/videos/J/J_09-01-2020_20_48_04.avi
uploaded /content/json/J/J_09-01-2020_20_48_04.json
downloaded videos/J/J_09-02-2020_08_54_25.avi
Setting mediapipe logger output
Running mediapipe on /content/videos/J/J_09-02-2020_08_54_25.avi
uploaded /content/json/J/J_09-02-2020_08_54_25.json
downloaded videos/J/J_09-06-2020_08_38_57.avi
Setting mediapipe logger output
Running mediapipe on /content/videos/J/J_09-06-2020_08_38_57.avi
uploaded /content/json/J/J_09-06-2020_08_38_57.json
downloaded videos/J/J_07-30-2020_22_08_58.avi
Setting mediapipe logger output
Running mediapipe on /content/videos/J/J_07-30-2020_22_08_58.avi
uploaded /content/json/J/J_07-30-2020_22_08_58.json
downloaded videos/J/J_09-06-2020_08_21_58.avi
Setting mediapipe logger output
Running mediapipe on /content/videos/J/J_09-06-2020