In [1]:
"""Hello Analytics Reporting API V4."""
import os
from dotenv import load_dotenv
import argparse

from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import pandas as pd
import numpy as np

from datetime import datetime, date, timedelta
from time import sleep

# Load environement variables
load_dotenv()

SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')

path = os.getcwd()
CLIENT_SECRETS_PATH = path + "\\client_id.json"

VIEW_ID = os.environ.get("VIEW_ID")

In [2]:
def return_last_sunday():
    yesterday = date.today() - timedelta(1)
    last_sunday = yesterday - timedelta((yesterday.weekday() + 1) % 7)
    return str(last_sunday)

def return_last_monday():
    yesterday = date.today() - timedelta(1)
    last_monday = yesterday - timedelta((yesterday.weekday() + 7) % 7)
    return str(last_monday)


def initialize_analyticsreporting():
    # Parse command-line arguments.
    parser = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      parents=[tools.argparser])
    flags = parser.parse_args([])
    
    # Set up a Flow object to be used if we need to authenticate.
    flow = client.flow_from_clientsecrets(
        CLIENT_SECRETS_PATH, scope=SCOPES,
        message=tools.message_if_missing(CLIENT_SECRETS_PATH))

    # Prepare credentials, and authorize HTTP object with them.
    # If the credentials don't exist or are invalid run through the native client
    # flow. The Storage object will ensure that if successful the good
    # credentials will get written back to a file.
    storage = file.Storage('analyticsreporting.dat')
    credentials = storage.get()
    if credentials is None or credentials.invalid:
        credentials = tools.run_flow(flow, storage, flags)
    
    http = credentials.authorize(http=httplib2.Http())

    # Build the service object.
    analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
    
    return analytics

def get_report(analytics, start_date, end_date, view_id, metrics, dimensions):
    # Use the Analytics Service Object to query the Analytics Reporting API V4.
    return analytics.reports().batchGet(
        body={
            "reportRequests": [
                {
                    "viewId": VIEW_ID,
                    "dateRanges": [{'startDate':start_date, 'endDate': end_date}],
                    "metrics": metrics,
                    "dimensions": dimensions,
                }]}
            ).execute()


def print_response(response):
    list = []
    # get report data
    for report in response.get('reports', []):
        # set column headers
        columnHeader = report.get('columnHeader', {})
        dimensionHeaders = columnHeader.get('dimensions', [])
        metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
        rows = report.get('data', {}).get('rows', [])
        
        for row in rows:
            # create dict for each row
            dict = {}
            dimensions = row.get('dimensions', [])
            dateRangeValues = row.get('metrics', [])
            
            # fill dict with dimension header (key) and dimension value (value)
            for header, dimension in zip(dimensionHeaders, dimensions):
                dict[header] = dimension

        # fill dict with metric header (key) and metric value (value)
        for i, values in enumerate(dateRangeValues):
            for metric, value in zip(metricHeaders, values.get('values')):
                #set int as int, float a float
                if ',' in value or '.' in value:
                    dict[metric.get('name')] = float(value)
                else:
                    dict[metric.get('name')] = int(value)

        list.append(dict)
        df = pd.DataFrame(list)
        return df

def return_ga_data(start_date, end_date, view_id, metrics, dimensions, split_dates, group_by):
    
    if split_dates == False:
        return print_response(get_report(initialize_analyticsreporting(), start_date, end_date, view_id, metrics, dimensions))
    
    else:
        start_date = datetime.strptime(start_date, '%Y-%m-%d').date()
        end_date = datetime.strptime(end_date, '%Y-%m-%d').date()
        delta = end_date - start_date         # timedelta
        dates = []
    
        for i in range(delta.days + 1):
            dates.append(start_date + timedelta(days=i))
        
        df_total = pd.DataFrame()
        
        for date in dates:
            date = str(date)
            df_total = df_total.append(print_response(get_report(initialize_analyticsreporting(), date, date, view_id, metrics, dimensions)))
            sleep(1)
            
        if len(group_by) != 0:
            df_total = df_total.groupby(group_by).sum()
            
        return df_total
        
df = return_ga_data(
    start_date=return_last_monday(),
    end_date=return_last_sunday(),
    view_id='100555616',
    metrics=[{"expression": "ga:avgSessionDuration"}],
    dimensions=[],
    split_dates=True,
    group_by=[],
)

df

In [15]:
df['ga:avgSessionDuration'] = str(timedelta(df['ga:avgSessionDuration']))
df

KeyError: 'ga:avgSessionDuration'

In [3]:
def return_last_sunday():
    yesterday = date.today() - timedelta(1)
    last_sunday = yesterday - timedelta((yesterday.weekday() + 1) % 7)
    return str(last_sunday)

monday = return_last_sunday()
monday

'2019-04-14'