<a href="https://colab.research.google.com/github/Location-Artistry/GEO-DEV-NOTEBOOKS/blob/main/GOOGLE_DRIVE_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Google Drive API**
### Export and download all files from drive

In [2]:
import os
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google.colab import drive
from googleapiclient.http import MediaIoBaseDownload
import io

import datetime as dt
import time
import getpass
from pathlib import Path
import pandas as pd
import numpy as np
import shutil
from mimetypes import MimeTypes

## Google APIs function library
From Water Assessment Notebook - Updated April 13 2021

In [3]:
# Get and return GoogleAPIS credentials
def getGcreds(CLIENT_SECRETS_FILE, SCOPES):
  flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES)
  credentials = flow.run_console()
  return credentials

# Create copy of Gdoc from template
def copyFile(DOCUMENT_ID, copyTitle, credentials):
  API_SERVICE_NAME = 'drive'
  API_VERSION = 'v3'
  service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
  newText = {'name': copyTitle}
  try:
    result = service.files().copy(fileId=DOCUMENT_ID, body=newText).execute()
    display(result)
    return result['id']
  except (errors.HttpError, error):
    print('An error occurred: %s') % error
  return None

# List files in drive, pass number desired file results
def listFiles(num, credentials):
  API_SERVICE_NAME = 'drive'
  API_VERSION = 'v3'
  service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
  results = service.files().list(
        pageSize=num, fields="nextPageToken, files(id, name, mimeType, size, parents, modifiedTime)").execute()
  items = results['files']
  # list all 20 files & folders
  for x, i in enumerate(items):
    display(f'{x} - {i.get("name")} - {i.get("id")} - {i.get("mimeType")} - {i.get("parents")} - {i.get("modifiedTime")}')
  #for item in items:
  #  display(item)
  return items

  # Drive search function, template from Google Developers
def fileSearch(type,query,credentials):
  f = {'jpeg': "mimeType='image/jpeg'",'pdf':"mimeType='application/pdf'",'doc':"mimeType='application/vnd.google-apps.document'",
     'sheet':"mimeType='application/vnd.google-apps.spreadsheet'",'folder':"mimeType='application/vnd.google-apps.folder'",'png':"mimeType='image/png'",
     'text':"mimeType='text/plain'"}
  API_SERVICE_NAME, API_VERSION, page_token = 'drive','v3',None
  drive_service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
  query = (f"{f[type]}{query}")
  while True:
    response = drive_service.files().list(q=query,spaces='drive',fields='nextPageToken, files(id, name)',pageToken=page_token).execute()
    for x, file in enumerate(response.get('files', [])):
      display(f'Found {x}: {file.get("name")} - {file.get("id")}')
    page_token = response.get('nextPageToken', None)
    if page_token is None:
      break
  return response['files']
  
# Update document with spcified requests
def updateDoc(DOCUMENT_ID, req, credentials):
  API_SERVICE_NAME = 'docs'
  API_VERSION = 'v1'
  service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
  result = service.documents().batchUpdate(documentId=DOCUMENT_ID, body={'requests': req}).execute()
  display(f'Edit Successful for id: {result["documentId"]} Changes made: {result["replies"][0]}')
  return result
# Get Gdoc item information for inserting at index location
def getDoc(DOCUMENT_ID, credentials):
  try:
    API_SERVICE_NAME = 'docs'
    API_VERSION = 'v1'
    service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
    result = service.documents().get(documentId=DOCUMENT_ID).execute()
    display(f'Title: {result["title"]} - id: {result["documentId"]}')
    return result
  except:
    display(f'FAIL to get {DOCUMENT_ID} SORRY')
# Get Gdoc content blocks in body
def getDocContent(doc):
  try:
    content = []
    for i,z in enumerate(doc['body']['content']):
      content.append(z)
      print(i,z)
    return content
  except:
    display(f'FAIL to get contents for {doc["title"]}')

## Working section


In [5]:
# Verify and create credentials
CLIENT_SECRETS_FILE = "/content/drive/MyDrive/CODE/CREDS/CRED-DESK.json"
SCOPES = ['https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/documents', 'https://www.googleapis.com/auth/spreadsheets']
# verify credentials run once when initalizing workflow
credentials = getGcreds(CLIENT_SECRETS_FILE,SCOPES)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=244871284916-ieooau3uf8ilc792obrsk33nu933918c.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocuments+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fspreadsheets&state=rsxz2lNP4zxrkU3p2Kd48sigoBbHDJ&prompt=consent&access_type=offline
Enter the authorization code: 4/1AX4XfWiouER0n3l1vRvzQ6yUR95kgVzVWN860v4WJ3vy0n5y8cLsll01fHk


In [44]:
#q = '1kQERMqp9il2XezS37tYECZkYGu4qiEYM'
q = '12zuOPCmpAekLymEQ_2re7vjwCZOm9EWK'
s = (f'and "1Z9-9KhSzKGGZpjFv361BIB2rqH9fKsZy" in parents')
#s = (f'and name contains ""')
search = fileSearch('pdf', s, credentials)

'Found 0: mi-part4-wqs.pdf - 1MvMkW6QWAQRG1LcyvmLAPMu8Tm0xKTMK'

'Found 1: NHBP Email Agreement 6_20_2019.pdf - 1udLi3lxUHuO9js5FThpskl91IunwfU57'

In [45]:
q = '1kQERMqp9il2XezS37tYECZkYGu4qiEYM'
s = (f'and "{q}" in parents')
search = fileSearch('folder', s, credentials)

'Found 0: NOAA-HYPERSPECTRAL-MNOMEN - 1zdsBqU9XCdxAFKz6yx7szFCbNlv-Tg1-'

'Found 1: THPO - 1gzjE-jqM2AVNN1QIPXe1gIYU5yXAojbQ'

'Found 2: AIR-PROJECT - 1l90swrj61HZliC75OqJxaSFOvQ7mV6i8'

'Found 3: NHBP-MAPS - 1pGFrhP7lfCD_Idoxj9bGmO_ArXRMzxSi'

'Found 4: WORK PICS - 1cBWrBqEpG6XX_o83XoKgosWbSa5qJAuX'

'Found 5: EGLE - 1Xmy-b1nwX09Ffu_kqNx2xq-wlpUDsSct'

'Found 6: PPG-2020 - 1cfhSACBRGdyDRGfJT67VTAUer4v9VwT1'

'Found 7: STAFF-HIRING - 1RIV1FAR8xoG5kNSzHnpMN4UzPmXFRP3E'

'Found 8:  ENV-DEPT - 12zuOPCmpAekLymEQ_2re7vjwCZOm9EWK'

'Found 9: TEAC - 1DO3UJ9vTLs6GWc6tuqjNbV_GHS8AwyWz'

'Found 10: LINE-5-CONFIDENTIAL - 1AhUeEdXgweL3BeVOhwbh1O7mncWhZskb'

'Found 11: CONSULTATIONS - 1mO0g292rmDQaE7n6XxItf_eNssxSBsCZ'

'Found 12: WATER_DATA_AUTOMATION - 1_qTI-XPdzvkMsIOHaWDq-zktsNUbeXh4'

'Found 13: GIS_PROGRAM - 1wYEqvDSOZLFstWYEce2MUKn6fBaukNGS'

'Found 14: MTEG - 1NIzK4WKwlzeniGoNAj2mDXv_gyOnzWJQ'

'Found 15: EK - 1Zm-qCDHrGtddUnZNnevSx-Kc7fQFcYJ5'

'Found 16: INVOICES - 1ZvGWizP7zajRczVnEJhGuCTn0fAb6Oqw'

'Found 17: WATER - 1Z9-9KhSzKGGZpjFv361BIB2rqH9fKsZy'

'Found 18: Q_DRIVE - 1Ki60kC8oEkQJhEulx6p_c1afMKaUXSv-'

In [None]:
search

In [None]:
gList = listFiles(100, credentials)

In [7]:
API_SERVICE_NAME, API_VERSION, page_token = 'drive','v3',None
drive_service = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)

In [19]:
file_id = '1PA4dhDvKtHibnl89E-Y3birm5dFPKlia'
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO() 
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))

Download 100%.


In [25]:
request = drive_service.files().export(fileId=file_id, mimeType='application/pdf')
file_id = '1aMbjH4BJMd4h_bsTXNabwvTpG5tsYM3E'
request = drive_service.files().get_media(fileId=file_id)

downloader = MediaIoBaseDownload(fh, request)
fh = io.BytesIO()
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%" % int(status.progress() * 100))

# The file has been downloaded into RAM, now save it in a file
fh.seek(0)
with open('BG96580014-4.pdf', 'wb') as f:
    shutil.copyfileobj(fh, f)

Download 100%


In [30]:
# This is working with pdfs!

file_id = '1aMbjH4BJMd4h_bsTXNabwvTpG5tsYM3E'
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()

downloader = MediaIoBaseDownload(fh, request)
done = False

while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%" % int(status.progress() * 100))
fh.seek(0)

with open('somepdf.pdf', 'wb') as f:
    shutil.copyfileobj(fh, f)

Download 100%


In [None]:
# https://stackoverflow.com/questions/46545336/search-files-recursively-using-google-drive-rest/46562607
