In [14]:
# Most code is from the following tutorial: https://www.thepythoncode.com/article/using-google-drive--api-in-python#Download_Files
# For set up, make sure to have the pip package installed and to enable the Google Drive API (
# https://developers.google.com/drive/api/v3/quickstart/python)

import pickle
import os
import re
import io
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaIoBaseDownload
from tqdm import tqdm
import requests

SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
          'https://www.googleapis.com/auth/drive',
          'https://www.googleapis.com/auth/drive.file'
          ]


def get_gdrive_service():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
           pickle.dump(creds, token)
    # return Google Drive API service
    return build('drive', 'v3', credentials=creds)

In [15]:
def search(service, query):
    # search for the file
    result = []
    page_token = None
    while True:
        response = service.files().list(q=query,
                                        spaces="drive",
                                        fields="nextPageToken, files(id, name, mimeType)",
                                        pageToken=page_token).execute()
        # iterate over filtered files
        for file in response.get("files", []):
            result.append((file["id"], file["name"], file["mimeType"]))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            # no more files
            break
    return result

In [16]:
def download(name):
    service = get_gdrive_service()
    # the name of the file you want to download from Google Drive 
    filename = name
    # search for the file by name
    search_result = search(service, query=f"name='{filename}'")
    # get the GDrive ID of the file
    file_id = search_result[0][0]
    # make it shareable
    service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
    # download file
    download_file_from_google_drive(file_id, filename)

In [17]:
def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        # get the file size from Content-length response header
        file_size = int(response.headers.get("Content-Length", 0))
        # extract Content disposition from response headers
        content_disposition = response.headers.get("content-disposition")
        # parse filename
        filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
        print("[+] File size:", file_size)
        print("[+] File name:", filename)
        progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
        with open(destination, "wb") as f:
            for chunk in progress:
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # update the progress bar
                    progress.update(len(chunk))
        progress.close()

    # base URL for download
    URL = "https://docs.google.com/uc?export=download"
    # init a HTTP session
    session = requests.Session()
    # make a request
    response = session.get(URL, params = {'id': id}, stream=True)
    print("[+] Downloading", response.url)
    # get confirmation token
    token = get_confirm_token(response)
    if token:
        params = {'id': id, 'confirm':token}
        response = session.get(URL, params=params, stream=True)
    # download to disk
    save_response_content(response, destination) 

In [18]:
import cv2
import os

def video_sampler(video):

    # check if video is a new video
    try:
        os.mkdir(os.path.splitext(str(video))[0])
    except Exception as e:
        print(e)
        return

    #open video
    print("Begin sampling for " + video + "\n")
    capture = cv2.VideoCapture(video)
    frame_number = 0
    ret = True
    while ret:
        image_counter = 0
        frame = []
        for image_counter in range(29):
            ret, frame = capture.read()
        
        if ret:
            cv2.imwrite(os.path.join(os.path.splitext(str(video))[0] + "/" + str(frame_number)) + ".jpg", frame)
            frame_number += 1

In [33]:
import re
def get_samples(filetype):
    service = get_gdrive_service()
    # search for files of given type
    result = search(service, query=f"mimeType='{filetype}'")
    for x in result:
        download(x[1])
        video_sampler(x[1])

In [34]:
if __name__ == '__main__':
    # enter file name here
    get_samples("video/avi"

[+] Downloading https://docs.google.com/uc?export=download&id=17Eerptye-o9ye2phipb3lKWftAcp6qGm


Downloading 200824 arms-1 east vestibule (1s09).avi: 32.0kByte [00:00, 194kByte/s]

[+] File size: 0
[+] File name: 200824 arms-1 east vestibule (1s09).avi


Downloading 200824 arms-1 east vestibule (1s09).avi: 151Byte [00:04, 36.5Byte/s]   


KeyboardInterrupt: 