In [1]:
from email.mime.multipart import MIMEMultipart
import os.path
import pandas as pd
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import datetime as dt
from email.mime.text import MIMEText
import base64
from googleapiclient.errors import HttpError
import numpy as np

In [2]:
# Connect to Drive API
spreadsheet_id = '1EXTXghFKV7Xfu8Jc6fxk2pC-Inb9g4XWKr5C-FxgpP4'

SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
creds = None
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
service = build('sheets', 'v4', credentials=creds)

# Call the Drive v3 API
SAMPLE_RANGE_NAME = 'Control!F2:F31'
request = service.spreadsheets().values().batchGet(
    spreadsheetId=spreadsheet_id,
    ranges=SAMPLE_RANGE_NAME,
    valueRenderOption='FORMATTED_VALUE')
response = request.execute()

In [3]:
df = pd.DataFrame(response['valueRanges'][0]['values']).dropna(how='all').reset_index(drop=True)
url_list = df[0].tolist()
url_list2 = [url.split('\n') for url in url_list]
url_list2 = [url[0] for url in url_list2]

In [4]:
def get_spreadsheet_id(url):
    return url.split('/')[-2]
ids = list(map(get_spreadsheet_id,url_list2))

In [5]:
opex_consolidated = pd.DataFrame()
hr_consolidated = pd.DataFrame()

def get_table(id_, range_):
    SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
    creds = None
    creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    service = build('sheets', 'v4', credentials=creds)

    # Call the Drive v3 API
    SAMPLE_RANGE_NAME = range_
    request = service.spreadsheets().values().batchGet(
        spreadsheetId=id_,
        ranges=SAMPLE_RANGE_NAME,
        valueRenderOption='FORMATTED_VALUE')
    response = request.execute()
    return pd.DataFrame(response['valueRanges'][0]['values']).dropna(how='all').reset_index(drop=True)

def repace_numbers(x):
    try:
        x = int(x)
    except (ValueError,TypeError):
        pass
    return x

def format_table(table, idx=4):
    table =  table.replace('',np.nan)\
               .dropna(axis=0,
                        subset=table.columns[:idx],
                        how="any")
    if table.iloc[0,0] not in ['Division','Location']:
        table = table.iloc[1:,1:]
    else:
        table = table.iloc[1:]
    table.iloc[:,5:] = table.iloc[:,5:].replace(regex=['\$',','],value='').applymap(repace_numbers)
    return table

def query_table(id_, range_):
    try :
        temp = get_table(id_, range_)
        temp = format_table(temp)
    except HttpError as err:
        if err.resp.status == 400:
            print(f"{source} not found in {id_}")
        if err.resp.status == 404:
            print(f"{id_} entity not found")
        temp = None

    return temp


opex_range = 'Opex!A1:U500'
hr_range = 'HR!A1:R500'

for i,id_ in enumerate(ids):
    for range_ in [hr_range, opex_range]:
        source = range_.split('!')[0]
        temp = query_table(id_,range_)
        if temp is not None:
            temp.insert(0,"Source",url_list2[i])
        if source == 'Opex':
            opex_consolidated = pd.concat([opex_consolidated, temp])
        else:
            hr_consolidated = pd.concat([hr_consolidated, temp])


KeyboardInterrupt: 

In [None]:
def update_sheet(id_, range_, table):
    scope = [
'https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive',
]

    last_updated = "Updated at - " + dt.datetime.utcnow().strftime("%m/%d/%Y, %H:%M:%S") +'UTC'

    if os.path.exists('write_token.json'):
            creds = Credentials.from_authorized_user_file('write_token.json', scope)

    service = build('sheets', 'v4', credentials=creds)

    service.spreadsheets().values().clear(
        spreadsheetId=id_,
        range=f"{range_}!A1",
        ).execute()

    service.spreadsheets().values().update(
        spreadsheetId=spreadsheet_id,
        valueInputOption='RAW',
        range=f"{range_}!A1",
        body=dict(
            majorDimension='ROWS',
            values=[[last_updated]]
    )
    ).execute()

    service.spreadsheets().values().clear(
    spreadsheetId=id_,
    range=f"{range_}!A3:V",
    ).execute()

    service.spreadsheets().values().update(
        spreadsheetId=id_,
        valueInputOption='RAW',
        range=f"{range_}!A3:V",
        body=dict(
            majorDimension='ROWS',
            values=table.fillna('').values.tolist()
    )
    ).execute()
    return 1

update_sheet(spreadsheet_id, "Opex_Consolidated", opex_consolidated)
update_sheet(spreadsheet_id, "HR_Consolidated", hr_consolidated)