# Google API for Drive

## Move Files from Root folder to 'to-be-backed' folder

In [4]:
from __future__ import print_function
import os, pandas as pd, pickle
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [5]:
# If modifying these scopes, delete the file token.json.

SCOPES = [
    'https://www.googleapis.com/auth/drive.metadata.readonly',
    'https://www.googleapis.com/auth/drive.file',
    'https://www.googleapis.com/auth/drive',
    'https://www.googleapis.com/auth/drive.appdata']

# We get access to these scopes only.

In [6]:
"""Authorization"""

creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.json'):
    try:
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
        print('authentication exists')
    except:
        print('token read failed')
        pass
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        print('getting authenticated')
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'client_secret.json', SCOPES)
        creds = flow.run_local_server(port=44599)
    # Save the credentials for the next run
    with open('token.json', 'w') as token:
        token.write(creds.to_json())

authentication exists
getting authenticated


In [7]:
# Start Service
service = build('drive', 'v3', credentials=creds)

In [8]:
%%time

"""
find all files in root
    - in parent folder, id = "fsdklfj32498rj98thj4iuen"
    - having quotaBytesUsed = 0
"""

parent_folder_id = "fsdklfj32498rj98thj4iuen"
page_token = None # which page to get
searched_files = []# list of files

while True:
    response = service.files().list(q=f"'{parent_folder_id}' in parents ",
                                    spaces='drive',
                                    pageSize=1000,
                                    fields='nextPageToken, files(id, name, fullFileExtension, quotaBytesUsed, size, webViewLink)',
                                    pageToken=page_token).execute()
    files = response.get('files', [])
    searched_files.extend( files )
    # print('Files added ' + str(len(files)) )
    page_token = response.get('nextPageToken', None)
    if page_token is None:
        break
print(f"Total files searched: {len(searched_files)}")

df = pd.DataFrame(data=searched_files)

Total files searched: 32552
CPU times: user 301 ms, sys: 65 ms, total: 366 ms
Wall time: 1min 8s


In [None]:
df.head()

In [15]:
df_files_root_size = df[df['quotaBytesUsed']!='0']

In [16]:
df_files_root_size.shape

(82, 6)

In [17]:
df_files_root_size.fullFileExtension.value_counts()

fullFileExtension
PNG                     25
docx                    18
pdf                     17
jpeg                     3
                         3
pptx                     1
0_apple.docx             1
7_bofa.docx              1
6_temp.docx              1
0_jpmc.docx              1
5syn.pdf                 1
5syn.docx                1
4_with_annexure.docx     1
PDF                      1
Name: count, dtype: int64

In [18]:
df_files_root_size.to_csv('df_files_root_size_moved.csv')

In [19]:
file_not_moved = []
file_moved = []
def move_file_to_folder(file_id, folder_id):
    """Move specified file to the specified folder.
    Args:
        file_id: Id of the file to move.
        folder_id: Id of the folder
    Print: An object containing the new parent folder and other meta data
    Returns : Parent Ids for the file

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """

    try:

        # Retrieve the existing parents to remove
#         print('-------------------')
#         print(f'Moving file: {file_id}')
        file = service.files().get(fileId=file_id, fields='parents').execute()
        previous_parents = ",".join(file.get('parents'))
#         print(f'Current parent: {previous_parents}')
        # Move the file to the new folder
        file = service.files().update(fileId=file_id, addParents=folder_id,
                                      removeParents=previous_parents,
                                      fields='id, parents').execute()
#         print(f'New parent: {file.get("parents")}')
        file_moved.append(file_id)
#         return None

    except HttpError as error:
        print(F'An error occurred: {error}')
        file_not_moved.append(file_id)
#         return None



In [20]:
%%time
folder_id = "fsdklfj32498rj98thj4iuen-dlmrk342portrg0ithnrg"
df_files_root_size.apply(lambda x: move_file_to_folder(x.id, folder_id), axis=1)
CPU times: user 1.02 s, sys: 139 ms, total: 1.16 s
Wall time: 1min 38s

CPU times: user 1.02 s, sys: 139 ms, total: 1.16 s
Wall time: 1min 38s


4        None
7        None
11       None
13       None
14       None
         ... 
29719    None
29723    None
29724    None
30511    None
30512    None
Length: 82, dtype: object

# Check if moved

In [None]:
%%time

"""
find all files in root
    - in parent folder, id = "fsdklfj32498rj98thj4iuen"
    - having quotaBytesUsed = 0
"""

parent_folder_id = "fsdklfj32498rj98thj4iuen"
page_token = None # which page to get
searched_files = []# list of files

while True:
    response = service.files().list(q=f"'{parent_folder_id}' in parents",
                                    spaces='drive',
                                    pageSize=1000,
                                    fields='nextPageToken, files(id, name, fullFileExtension, quotaBytesUsed, size, webViewLink)',
                                    pageToken=page_token).execute()
    files = response.get('files', [])
    searched_files.extend( files )
    # print('Files added ' + str(len(files)) )
    page_token = response.get('nextPageToken', None)
    if page_token is None:
        break
print(f"Total files searched: {len(searched_files)}")

df = pd.DataFrame(data=searched_files)
df.shape

In [23]:
df[df['quotaBytesUsed']!='0'].shape

(0, 6)

In [None]:
# All moved successfully