# If you just updated `spswarehouse` you'll need to update `scopes` in `credentials.py` (Line 23) from `credentials.py.template` before using GoogleDrive.

### GoogleDrive is a wrapper on pydrive.drive (https://pythonhosted.org/PyDrive/)

In [1]:
import spswarehouse
from spswarehouse.table_utils import *
from spswarehouse.warehouse import Warehouse
from spswarehouse.googledrive import GoogleDrive

import pandas as pd
import os

To access your Google Drive file, share the file with jupyter-sheets@sps-warehouse.iam.gserviceaccount.com


# Getting a list of available files

In [2]:
# Retrieve all non-trashed files
fileList = GoogleDrive.ListFile({'q': "trashed = False"}).GetList()
# 'q' is a query parameter; see next cell for what fields can be queried
# Other passable parameters: https://developers.google.com/drive/api/v3/reference/files/list
for file in fileList:
    print('Title: %s, ID: %s' % (file['title'], file['id']))

Title: spswarehouse_sample_file.csv, ID: 1Mug7698RhjfrVrIgHs6Yc3AM0mhGNA7q
Title: Excellence Indicators Query Mapping, ID: 1zgLFoDQSdziCQUS-tMasoH7D5TgQVt2DGcAFz65Z3P0
Title: DE-AP Student Datafile 2019.csv, ID: 1Oi1ojPryvonzIb6Qvi6pC49yJVgwVbTF
Title: sites_historical, ID: 1A7tZtvvaED85wDtnm5BeM7pGWGDyaq5pXNyJIYgv9xU
Title: SY19 - Final Grades Review - Sierra, ID: 1_sbVtOjkeo3LHCz4TAi15vwRw6RSakgX6ieJODc-l9w
Title: SY19 - Final Grades Review - Rainier, ID: 1FToIfC0M-gYHuTbQcfhce8fsu1MFI9ZtEGLJcrnR2EM
Title: SY19 - Final Grades Review, ID: 16fA-2eJqa1dO_V85osArWEg9aHvu9UcSH90NCtV2UCY
Title: SY19 - Final Grades Review - Olympus, ID: 1EOdH6Z2Cgm1hjzGDV9t9ADvUuD2p5VUWaLOPf33OKpE
Title: SY19 - Final Grades Review - Atlas, ID: 12WTQIuyvYAVbNpEuUg1ykSbuHAxc8uxPEK42WA3B0PQ
Title: SY19 - Final Grades Review - Tamalpais, ID: 1y9t0e6mGRNtWHEu0Kp8DeyQmWgd9lnQ2Ot0aini8Xu8
Title: SY19 - Final Grades Review - Tahoma, ID: 1fVh4nIksrxRB5W_U4FlGFybl7koppoB9B6wEt4cBhoY
Title: SY19 - Final Grades Review 

In [3]:
# Search for file by title
fileList2 = GoogleDrive.ListFile({'q': "title contains 'SY19 - Final'"}).GetList()
for file in fileList2:
    print('Title: %s, ID: %s' % (file['title'], file['id']))

Title: SY19 - Final Grades Review - Sierra, ID: 1_sbVtOjkeo3LHCz4TAi15vwRw6RSakgX6ieJODc-l9w
Title: SY19 - Final Grades Review - Rainier, ID: 1FToIfC0M-gYHuTbQcfhce8fsu1MFI9ZtEGLJcrnR2EM
Title: SY19 - Final Grades Review, ID: 16fA-2eJqa1dO_V85osArWEg9aHvu9UcSH90NCtV2UCY
Title: SY19 - Final Grades Review - Olympus, ID: 1EOdH6Z2Cgm1hjzGDV9t9ADvUuD2p5VUWaLOPf33OKpE
Title: SY19 - Final Grades Review - Atlas, ID: 12WTQIuyvYAVbNpEuUg1ykSbuHAxc8uxPEK42WA3B0PQ
Title: SY19 - Final Grades Review - Tamalpais, ID: 1y9t0e6mGRNtWHEu0Kp8DeyQmWgd9lnQ2Ot0aini8Xu8
Title: SY19 - Final Grades Review - Tahoma, ID: 1fVh4nIksrxRB5W_U4FlGFybl7koppoB9B6wEt4cBhoY
Title: SY19 - Final Grades Review - Summit Prep, ID: 1bCRAAxLeYi5RzxGGrwnbkbpCnm06ytjz0ieAOjq9zJs
Title: SY19 - Final Grades Review - Shasta, ID: 1Z8n7_752ojtksAKA4OMnLCPDsABmczCgdcL4GfRrrwQ
Title: SY19 - Final Grades Review - K2, ID: 19R524jJEGArFF_HXDQihVBQoU-OyI85dfnZvuB8kAes
Title: SY19 - Final Grades Review - Everest, ID: 18cbmeVT61BbxBy_JkC_RH9N-

In [None]:
# Search for files in a folder
folderID = '1NtuZczi7zPQqXAKcd60yQtzPDa6q3jA-'
# You can get the folder id either by listing all files as above (folders show up in the list)
# or by opening the folder in a browser and copying the string after `folder/`

folderFiles = GoogleDrive.ListFile({'q': f"'{folderID}' in parents"}).GetList()
for file in folderFiles:
    print('Title: %s, ID: %s' % (file['title'], file['id']))

In [None]:
# Sample file parameters that can be queried
# Some of these parameters are actually dictionaries, and contain additional parameters that can be queried
# e.g., `trashed` is a parameter in `labels`

# I highly recommend that you stick to querying title or all files

file = fileList[-1]
dict(file)

# Retrieving a file

In [4]:
# Option 1: Use the id
# This is preferred, because it's guaranteed to return a single file

# You can get the id either by using the list above 
# OR by clicking on the file in drive, `Get shareable link`, then copying everything after `id=`
# e.g., https://drive.google.com/open?id=1Mug7698RhjfrVrIgHs6Yc3AM0mhGNA7q


fileID = '1Mug7698RhjfrVrIgHs6Yc3AM0mhGNA7q' #This is the an old csv version of sites_historical
# The id of the file can also be retrieved via `file['id']` (See the print statement in the file search examples)

sampleFile = GoogleDrive.CreateFile({'id': fileID})
print(sampleFile['title'])

spswarehouse_sample_file.csv


In [5]:
# Option 2: Do a title search
# This can get messy if there are multiple files with the same name

titleSampleFile = GoogleDrive.ListFile({'q': "title = 'spswarehouse_sample_file.csv'"}).GetList()[0]
print(titleSampleFile['title'])

spswarehouse_sample_file.csv


In [None]:
# Downloading the file to your local machine as a file called <filename>
filename = 'data.csv'
sampleFile.GetContentFile(filename)

In [6]:
# Retrieve file contents as a string, which you can then manipulate
dataString = sampleFile.GetContentString()
dataString[:500]

'as_of,academic_year,site_id,site_name,site_short_name,grades_served,years_open,used_personalized_learning,authorizing_authority,school_leader,electives_schedule\r\n5/21/2019,2004,3,Summit Preparatory High School,Prep,"{,9,}",1,FALSE,Summerville Union HSD,Diane Tavenner,Intersession\r\n5/21/2019,2005,3,Summit Preparatory High School,Prep,"{,9,10,}",2,FALSE,Summerville Union HSD,Diane Tavenner,Intersession\r\n5/21/2019,2006,3,Summit Preparatory High School,Prep,"{,9,10,11,}",3,FALSE,Summerville Union HS'

# Uploading to Warehouse from Drive

In [7]:
schema = 'wild_west'
table_name = 'google_drive_test'

create_sql = create_table_stmt(table_name, schema=schema, google_drive_id = fileID)
print(create_sql)

CREATE TABLE wild_west.google_drive_test (as_of DATE, academic_year INTEGER, site_id INTEGER, site_name VARCHAR, site_short_name VARCHAR, grades_served VARCHAR, years_open INTEGER, used_personalized_learning BOOLEAN, authorizing_authority VARCHAR, school_leader VARCHAR, electives_schedule VARCHAR) COMMENT = ''


In [None]:
Warehouse.execute(create_sql)

In [None]:
reflected_table = Warehouse.reflect(table_name, schema=schema)
upload_to_warehouse(reflected_table, google_drive_id = fileID)

# Cleanup of this notebook

#### Run this when you're done to clean up loose ends.

In [None]:
Warehouse.execute(f"DROP TABLE IF EXISTS {schema}.{table_name}")
if os.path.exists(filename):
    os.remove(filename)