In [None]:
# Copyright 2023 Nils Knieling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Google Drive Export

[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-%23F9AB00.svg?logo=googlecolab&logoColor=white)](https://colab.research.google.com/github/Cyclenerd/toolbox/blob/master/notebooks/Google_Drive_Export.ipynb)
[![Open in Vertex AI Workbench](https://img.shields.io/badge/Open%20in%20Vertex%20AI%20Workbench-%234285F4.svg?logo=googlecloud&logoColor=white)](https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/Cyclenerd/toolbox/master/notebooks/Google_Drive_Export.ipynb)
[![View on GitHub](https://img.shields.io/badge/View%20on%20GitHub-181717.svg?logo=github&logoColor=white)](https://github.com/Cyclenerd/toolbox/blob/master/notebooks/Google_Drive_Export.ipynb)

The [Google Drive API](https://developers.google.com/drive/api/guides/manage-downloads) supports several types of download and export actions:

* Download: Blob file content using the `files.get` method with the `alt=media` URL parameter.
* Export: Google Workspace document content in a format that your app can handle, using `files.export`.

![Screenshot](https://raw.githubusercontent.com/Cyclenerd/toolbox/master/notebooks/google-drive-export.png)

## Install required packages

>⚠️ You may receive a warning to "Restart Runtime" after the packages are installed. Don't worry, the subsequent cells will help you restart the runtime.

In [None]:
#@markdown ### Install dependencies

#@markdown * [Google Authentication Library](https://pypi.org/project/google-auth/)
#@markdown * [Google API Client Library for Python](https://pypi.org/project/google-api-python-client/)

!pip install google-auth==2.22.0
!pip install google-api-python-client==2.97.0

print("☑️ Done")

In [None]:
#@markdown ### Restart

# Automatically restart kernel after installs so that your environment
# can access the new packages.
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [1]:
#@markdown ### (Colab only!) Authenticate your Google Cloud Account

# Authenticate gcloud.
from google.colab import auth
auth.authenticate_user()

print("☑️ OK")

In [2]:
#@markdown ### Import Python libraries

from __future__ import print_function

import io

import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload

print("☑️ OK")

In [3]:
# @markdown ### Enable download blob file function

def download_file(real_file_id):
    """Downloads a file
    Args:
        real_file_id: ID of the file to download
    Returns : IO object with location.

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """
    creds, _ = google.auth.default()

    try:
        # create drive api client
        service = build('drive', 'v3', credentials=creds)

        file_id = real_file_id

        # pylint: disable=maybe-no-member
        request = service.files().get_media(fileId=file_id)
        file = io.BytesIO()
        downloader = MediaIoBaseDownload(file, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(F'Download {int(status.progress() * 100)}.')

    except HttpError as error:
        print(F'An error occurred: {error}')
        file = None

    return file.getvalue()

print("☑️ OK")

In [None]:
# @markdown ### Enable export document function

# @markdown Supported export MIME types:
# @markdown <https://developers.google.com/drive/api/guides/ref-export-formats>

def export_doc(real_file_id, mime_type):
    """Download a Document file.
    Args:
        real_file_id : file ID of any workspace document format file
        mime_type : One of https://developers.google.com/drive/api/guides/ref-export-formats

    Returns : IO object with location

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """
    creds, _ = google.auth.default()

    try:
        # create drive api client
        service = build('drive', 'v3', credentials=creds)

        file_id = real_file_id

        # pylint: disable=maybe-no-member
        request = service.files().export_media(fileId=file_id,
                                               mimeType=mime_type)
        file = io.BytesIO()
        downloader = MediaIoBaseDownload(file, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(F'Download {int(status.progress() * 100)}.')

    except HttpError as error:
        print(F'An error occurred: {error}')
        file = None

    return file.getvalue()

print("☑️ OK")

In [None]:
# @markdown ### Enable write to local file function

def write_to_file(file_name, binary_file_content):
  # Open file for export in binary mode (wb)
  file = open(file_name, "wb")
  # Write to file
  file.write(binary_file_content)
  # Close file
  file.close()

print("☑️ OK")

## File

In [None]:
# @markdown ### Enter file ID
file_id = "1eyxyqsMHDyCodWFzN_lvzPEf_E1amzY1ME-jS_UXHOI"  # @param {type:"string"}

# Quick input validations.
assert file_id, "⚠️ Please input a valid file ID"

print(f"☑️ File ID: {file_id}")


Note for me:

* Workspace document:
  ```
  1eyxyqsMHDyCodWFzN_lvzPEf_E1amzY1ME-jS_UXHOI
  ```
* JPEG image:
  ```
  17Qo-E9AxFdgwlKTfx3ZwUmpCy2jTv47h
  ```

In [None]:
# @markdown ### Download a blob file stored on Drive (no document)

# @markdown Enter filename for export:
file_name = "export.jpg"  # @param {type:"string"}

# Quick input validations.
assert file_name, "⚠️ Please input a valid file name for blob export"

# Get blob
content_export = download_file(file_id)
write_to_file(file_name, content_export)

print("☑️ Done")

In [None]:
# @markdown ### Export a document stored on Drive

# @markdown > Please note: Exported content is limited to 10 MB.

# @markdown Enter filenames for export:
file_pdf_name = "export.pdf"  # @param {type:"string"}
file_word_name = "export.docx"  # @param {type:"string"}
file_text_name = "export.txt"  # @param {type:"string"}

# Quick input validations.
assert file_pdf_name, "⚠️ Please input a valid file name for PDF export"
assert file_word_name, "⚠️ Please input a valid file name for Word export"
assert file_text_name, "⚠️ Please input a valid file name for Plain Text export"

# Get PDF export
content_export_pdf = export_doc(file_id, 'application/pdf')
write_to_file(file_pdf_name, content_export_pdf)

# Get Microsoft Word export
content_export_word = export_doc(
    file_id,
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
write_to_file(file_word_name, content_export_word)

# Get plain text export
content_export_text = export_doc(file_id, 'text/plain')
write_to_file(file_text_name, content_export_text)

print("☑️ Done")