In [1]:
from youtube_utils.google import Create_Service
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
import io
import os
import socket
socket.setdefaulttimeout(30000)
import time
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
import rawpy
import imageio
import shutil
import cv2
from tqdm import tqdm
import zipfile

In [2]:
def convert_ARW2JPG(path, dst_folder):
    fn = path.split('/')[-1][:-4]+'.jpg'
    dst_path = os.path.join(dst_folder, fn)
    with rawpy.imread(path) as raw:
        rgb = raw.postprocess(use_camera_wb=True)

    rgb1 = rgb[:,:,:1].copy()
    rgb2 = rgb[:,:,1:2].copy()
    rgb3 = rgb[:,:,2:3].copy()

    rgb_f = np.concatenate([rgb1, rgb2, rgb3], axis=2)
    plt.imsave(dst_path, rgb_f)

In [3]:
CLIENT_SECRET_FILE = 'client_secret.json'
API_NAME = 'drive'
API_VERSION = 'v3'
SCOPES = [
    'https://www.googleapis.com/auth/drive']

In [4]:
service = Create_Service(CLIENT_SECRET_FILE, API_NAME, API_VERSION, SCOPES)

client_secret.json-drive-v3-(['https://www.googleapis.com/auth/drive'],)
['https://www.googleapis.com/auth/drive']
drive service created successfully


In [5]:
files_service = service.files()

# Upload

In [5]:
file_metadata = {'name': 'output_teams_17.mp4'}
media = MediaFileUpload(
    '../output_teams_17_B.mp4',
    mimetype='video/mp4',
    resumable=True)

In [6]:
file = files_service.create(
    body=file_metadata,
    media_body=media,
    fields='id').execute()
print('File ID: %s' % file.get('id'))

File ID: 1_hkp7XhNYu-XtZh56ROaQAYabPg3Nqm8


# Download from Drive

In [6]:
files = []
folder_id = '1RQGwFKxRrWRm89fEQ65sbZrseOv8talR'
res = files_service.list(corpus="user",
                   q=f'"{folder_id}" in parents',
                   fields="nextPageToken, files(id, name)",
                   supportsAllDrives=True, 
                   includeItemsFromAllDrives=True).execute()
files = files + res['files'] 
page_token = res.get('nextPageToken', False)

while page_token:
    res = files_service.list(corpus="user",
                    q=f'"{folder_id}" in parents',
                    fields="nextPageToken, files(id, name)",
                    pageToken=page_token,
                    supportsAllDrives=True, 
                    includeItemsFromAllDrives=True).execute()
    files = files + res['files'] 
    page_token = res.get('nextPageToken', False)

In [11]:
if os.path.isfile('extracted_archives.npy'):
    extracted_file_ids = np.load('extracted_archives.npy').tolist()
else:
    extracted_file_ids = []

In [12]:
files = sorted(files, key=lambda x: x['name'])
files = [file for file in files if file['id'] not in extracted_file_ids and file['name'].startswith('Cards')]

len(extracted_file_ids), len(files)

(51, 0)

In [9]:
from multiprocessing import Pool

def convert_cards(card_folder):
    fnms = [fn for fn in os.listdir(card_folder) if not fn.endswith('.ipynb_checkpoints')]
    for fn in fnms:
        filepath = os.path.join(card_folder, fn)
        if fn.endswith('arw'):
            convert_ARW2JPG(filepath, card_dst_folder)
        else:
            shutil.copy(filepath, os.path.join(card_dst_folder, fn))
            
def convert_O7(o7_folder):
    fnms = [fn for fn in os.listdir(o7_folder) if not fn.endswith('.ipynb_checkpoints')]
    for fn in fnms:
        filepath = os.path.join(o7_folder, fn)
        if fn.endswith('arw'):
            convert_ARW2JPG(filepath, o7_dst_folder)
        else:
            shutil.copy(filepath, os.path.join(o7_dst_folder, fn))

In [10]:
zip_folder = './data/'

data_folder = '/home/ubuntu/storage/Doc2Answer/download_from_drive/data'
chunk_folders = [fn for fn in os.listdir(data_folder) if 'Cards ' in fn]
card_dst_folder = os.path.join(data_folder, 'Cards')
o7_dst_folder = os.path.join(data_folder, 'O7')

for file in files:
    request = files_service.get_media(fileId=file['id'])
    fn = file['name']
    zip_path = os.path.join(zip_folder, fn)
    print(zip_path)
    if not os.path.isfile(zip_path):
        fh = io.FileIO(zip_path, "wb")
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
    
    tmp_path = os.path.join(data_folder, 'tmp')
    chunk_path = os.path.join(data_folder, 'chunk')
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(tmp_path)
    shutil.move(os.path.join(tmp_path, os.listdir(tmp_path)[0]), chunk_path)
    shutil.rmtree(tmp_path)

    cards_folders = [os.path.join(chunk_path, fn) for fn in os.listdir(chunk_path) 
                     if '_o7' not in fn.lower() and not fn.endswith('.ipynb_checkpoints') and os.path.isdir(os.path.join(chunk_path, fn))]
    o7_folders = [os.path.join(chunk_path, fn) for fn in os.listdir(chunk_path)
                  if '_o7' in fn.lower() and not fn.endswith('.ipynb_checkpoints') and os.path.isdir(os.path.join(chunk_path, fn))]
    print(len(cards_folders), len(o7_folders))
    os.makedirs(card_dst_folder, exist_ok=True)
    os.makedirs(o7_dst_folder, exist_ok=True)
    
    with Pool() as p:
        p.map(convert_cards, cards_folders)
    with Pool() as p:
        p.map(convert_O7, o7_folders)

    shutil.rmtree(chunk_path)
    os.remove(zip_path)
    extracted_file_ids.append(file['id'])
    np.save('extracted_archives', extracted_file_ids)

./data/Cards 3_20.03.2021-20210817T102617Z-010.zip
9 2
./data/Cards 3_20.03.2021-20210817T102617Z-011.zip
8 3
./data/Cards 3_20.03.2021-20210817T102617Z-012.zip
7 2
./data/Cards 3_20.03.2021-20210817T102617Z-013.zip
5 6
./data/Cards 3_20.03.2021-20210817T102617Z-014.zip
6 6
./data/Cards 3_20.03.2021-20210817T102617Z-015.zip
6 3
./data/Cards 3_20.03.2021-20210817T102617Z-016.zip
7 3
./data/Cards 3_20.03.2021-20210817T102617Z-017.zip
5 1
./data/Cards 3_20.03.2021-20210817T102617Z-018.zip
2 0
./data/Cards 4_9.04.2021-20210830T101756Z-001.zip
4 10
./data/Cards 4_9.04.2021-20210830T101756Z-002.zip
9 0
./data/Cards 4_9.04.2021-20210830T101756Z-003.zip
9 3
./data/Cards 4_9.04.2021-20210830T101756Z-004.zip
10 3
./data/Cards 5_15.04.2021-20210830T102505Z-001.zip
6 7
./data/Cards 5_15.04.2021-20210830T102505Z-002.zip
5 1
./data/Cards 6_15.04.2021-20210830T102722Z-001.zip
7 7
./data/Cards 6_15.04.2021-20210830T102722Z-002.zip
10 4
./data/Cards 6_15.04.2021-20210830T102722Z-003.zip
8 0
./data/Card