-
Notifications
You must be signed in to change notification settings - Fork 2k
/
Copy pathdownload_files.py
119 lines (106 loc) · 4.47 KB
/
download_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pickle
import os
import re
import io
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaIoBaseDownload
import requests
from tqdm import tqdm
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.file'
]
def get_gdrive_service():
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
# initiate Google Drive service API
return build('drive', 'v3', credentials=creds)
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
# get the file size from Content-length response header
file_size = int(response.headers.get("Content-Length", 0))
# extract Content disposition from response headers
content_disposition = response.headers.get("content-disposition")
# parse filename
filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
print("[+] File size:", file_size)
print("[+] File name:", filename)
progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
with open(destination, "wb") as f:
for chunk in progress:
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# update the progress bar
progress.update(len(chunk))
progress.close()
# base URL for download
URL = "https://docs.google.com/uc?export=download"
# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params = {'id': id}, stream=True)
print("[+] Downloading", response.url)
# get confirmation token
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm':token}
response = session.get(URL, params=params, stream=True)
# download to disk
save_response_content(response, destination)
def search(service, query):
# search for the file
result = []
page_token = None
while True:
response = service.files().list(q=query,
spaces="drive",
fields="nextPageToken, files(id, name, mimeType)",
pageToken=page_token).execute()
# iterate over filtered files
for file in response.get("files", []):
print(f"Found file: {file['name']} with the id {file['id']} and type {file['mimeType']}")
result.append((file["id"], file["name"], file["mimeType"]))
page_token = response.get('nextPageToken', None)
if not page_token:
# no more files
break
return result
def download():
service = get_gdrive_service()
# the name of the file you want to download from Google Drive
filename = "bbc.zip"
# search for the file by name
search_result = search(service, query=f"name='{filename}'")
# get the GDrive ID of the file
file_id = search_result[0][0]
# make it shareable
service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
# download file
download_file_from_google_drive(file_id, filename)
if __name__ == '__main__':
download()