ATTENTION: Please ensure that 'client_secret.json' and 'credentials.json' file are in the same directory before running this notebook!

In [1]:
from __future__ import print_function
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
import pandas as pd

SPREADSHEET_ID = '1i87UrRoLSHpPUJX_kllun8Ur1or_AmHi0z-2r1LVnFE'
RANGE_NAME = 'Sheet1'

def get_google_sheet(spreadsheet_id, range_name):
    """ 
    Retrieve sheet data using OAuth credentials and Google Python API. 
    """
    scopes = 'https://www.googleapis.com/auth/spreadsheets.readonly'
    
    # Setup the Sheets API
    store = file.Storage('credentials.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('client_secret.json', scopes)
        creds = tools.run_flow(flow, store)
    service = build('sheets', 'v4', http=creds.authorize(Http()))

    # Call the Sheets API
    gsheet = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
    return gsheet


In [2]:
def gsheet2df(gsheet):
    """ Converts Google sheet data to a Pandas DataFrame.
    Note: This script assumes that your data contains a header file on the first row!
    Also note that the Google API returns 'none' from empty cells - in order for the code
    below to work, you'll need to make sure your sheet doesn't contain empty cells,
    or update the code to account for such instances.
    """
    header = gsheet.get('values', [])[0]   # Assumes first line is header!
    values = gsheet.get('values', [])[1:]  # Everything else is data.
    if not values:
        print('No data found.')
    else:
        all_data = []
        for col_id, col_name in enumerate(header):
            column_data = []
            for row in values:
                column_data.append(row[col_id])
            ds = pd.Series(data=column_data, name=col_name)
            all_data.append(ds)
        df = pd.concat(all_data, axis=1)
        return df
    

In [8]:
def main():
    gsheet = get_google_sheet(SPREADSHEET_ID, RANGE_NAME)
    df = gsheet2df(gsheet)

    # Assert that we got a pandas dataframe back
    assert isinstance(df, pd.DataFrame) 

    # Prints dimensions of dataframe
    print('Dataframe size = ', df.shape)

    # Print dataframe to console
    print(df.head())

    # Convert dataframe to csv and place it in working directory
    df.to_csv('top_100_file.csv')

In [9]:
if __name__ == "__main__":
    main()

Dataframe size =  (100, 3)
  Current Players Peak today                              Game
0         712,986    794,832     PLAYERUNKNOWN'S BATTLEGROUNDS
1         708,682    754,854                            Dota 2
2         392,210    478,849  Counter-Strike: Global Offensive
3          84,689     88,184    Tom Clancy's Rainbow Six Siege
4          69,455     71,320                          Warframe
