Since you cannot easily download files larger than 40MB from google drive with wget we use a small python script.

In [1]:
import requests

def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)

    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

In [2]:
destination = 'data.tar.gz'
file_id = '1dBu7N_kQ23nd83yCYQkgvw0pOBXreMnN'
download_file_from_google_drive(file_id, destination)

In [3]:
%%bash
tar -xvf data.tar.gz

data/titanic.csv
data/user.json
data/houses.csv
data/bills_exploded.parquet/.part-00003-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet.crc
data/bills_exploded.parquet/._SUCCESS.crc
data/bills_exploded.parquet/part-00000-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet
data/bills_exploded.parquet/.part-00001-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet.crc
data/bills_exploded.parquet/part-00005-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet
data/bills_exploded.parquet/.part-00002-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet.crc
data/bills_exploded.parquet/part-00012-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet
data/bills_exploded.parquet/part-00001-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet
data/bills_exploded.parquet/part-00006-ceb74ad2-c8f7-48ed-8c16-5c1f6ee8f063-c000.snappy.parquet
data/bills_exploded.parquet/
data/bills-1000000.json
data/ml-latest-small/ratings.csv
data/houses_train.pkl
data/ml-latest-smal