In [None]:
pip install google-api-python-client

In [None]:
pip install --upgrade oauth2client

In [1]:
#import libraries
import numpy as np
import pandas as pd
from google.oauth2 import service_account
from apiclient.discovery import build
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

In [2]:
"""Hello Analytics Reporting API V4."""

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'hallowed-valve-368511-c2eec52bce5c.json'
VIEW_ID = '214639456'


def initialize_analyticsreporting():
  """Initializes an Analytics Reporting API V4 service object.

  Returns:
    An authorized Analytics Reporting API V4 service object.
  """
  credentials = ServiceAccountCredentials.from_json_keyfile_name(
      KEY_FILE_LOCATION, SCOPES)

  # Build the service object.
  analytics = build('analyticsreporting', 'v4', credentials=credentials)

  return analytics


def get_report(analytics):
  """Queries the Analytics Reporting API V4.

  Args:
    analytics: An authorized Analytics Reporting API V4 service object.
  Returns:
    The Analytics Reporting API V4 response.
  """
  return analytics.reports().batchGet(
      body={
        'reportRequests': [
        {
          'viewId': VIEW_ID,
          'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
          'metrics': [{'expression': 'ga:users'}],
          'dimensions': [{'name': 'ga:date'}]
        }]
      }
  ).execute()


def print_response(response):
  """Parses and prints the Analytics Reporting API V4 response.

  Args:
    response: An Analytics Reporting API V4 response.
  """
  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])

    for row in report.get('data', {}).get('rows', []):
      dimensions = row.get('dimensions', [])
      dateRangeValues = row.get('metrics', [])

      for header, dimension in zip(dimensionHeaders, dimensions):
        print(header + ': ', dimension)

      for i, values in enumerate(dateRangeValues):
        print('Date range:', str(i))
        for metricHeader, value in zip(metricHeaders, values.get('values')):
          print(metricHeader.get('name') + ':', value)


def main():
  analytics = initialize_analyticsreporting()
  response = get_report(analytics)
  print_response(response)

if __name__ == '__main__':
  main()

ga:date:  20221109
Date range: 0
ga:users: 6
ga:date:  20221110
Date range: 0
ga:users: 2
ga:date:  20221111
Date range: 0
ga:users: 2
ga:date:  20221112
Date range: 0
ga:users: 2
ga:date:  20221113
Date range: 0
ga:users: 3
ga:date:  20221114
Date range: 0
ga:users: 1
ga:date:  20221115
Date range: 0
ga:users: 1
ga:date:  20221116
Date range: 0
ga:users: 1


In [3]:
"""Hello Analytics Reporting API V4 Customized Pandas Data frames."""

def format_summary(response):
    try:
        # create row index
        try: 
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except:
            row_index_named = None
        
        # extract column names
        summary_column_names = [item['name'] for item in response['reports'][0]
                                ['columnHeader']['metricHeader']['metricHeaderEntries']]
    
        # extract table values
        summary_values = [element['metrics'][0]['values'] for element in response['reports'][0]['data']['rows']]
    
        # combine. I used type 'float' because default is object, and as far as I know, all values are numeric
        df = pd.DataFrame(data = np.array(summary_values), 
                          index = row_index_named, 
                          columns = summary_column_names).astype('float')
    
    except:
        df = pd.DataFrame()
        
    return df

def format_pivot(response):
    try:
        # extract table values
        pivot_values = [item['metrics'][0]['pivotValueRegions'][0]['values'] for item in response['reports'][0]
                        ['data']['rows']]
        
        # create column index
        top_header = [item['dimensionValues'] for item in response['reports'][0]
                      ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        column_metrics = [item['metric']['name'] for item in response['reports'][0]
                          ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        array = np.concatenate((np.array(top_header),
                                np.array(column_metrics).reshape((len(column_metrics),1))), 
                               axis = 1)
        column_index = pd.MultiIndex.from_arrays(np.transpose(array))
        
        # create row index
        try:
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except: 
            row_index_named = None
        # combine into a dataframe
        df = pd.DataFrame(data = np.array(pivot_values), 
                          index = row_index_named, 
                          columns = column_index).astype('float')
    except:
        df = pd.DataFrame()
    return df

def format_report(response):
    summary = format_summary(response)
    pivot = format_pivot(response)
    if pivot.columns.nlevels == 2:
        summary.columns = [['']*len(summary.columns), summary.columns]
    
    return(pd.concat([summary, pivot], axis = 1))

def run_report(body, credentials_file):
    #Create service credentials
    credentials = service_account.Credentials.from_service_account_file(credentials_file, 
                                scopes = ['https://www.googleapis.com/auth/analytics.readonly'])
    #Create a service object
    service = build('analyticsreporting', 'v4', credentials=credentials)
    
    #Get GA data
    response = service.reports().batchGet(body=body).execute()
    
    return(format_report(response))

----------------------------------------

In [4]:
your_view_id = '214639456'
ga_keys = 'hallowed-valve-368511-c2eec52bce5c.json'

In [49]:
body1 = body1 = {'reportRequests': [{'viewId': your_view_id, 
                            'dateRanges': [{'startDate': '2019-01-01', 'endDate': '2022-11-10'}],
                            'metrics': [{'expression': 'ga:users'}, 
                                        {'expression': 'ga:bounceRate'},
                                       {'expression': 'ga:avgSessionDuration'},
                                       {'expression': 'ga:newUsers'},
                                       #{'expression': 'ga:sessionscountry'},
                                       {'expression': 'ga:timeOnPage'},
                                       {'expression': 'ga:percentNewSessions'}],
                            'dimensions': [{'name': 'ga:date'}]
                                       }]
                          }

In [50]:
ga_report = run_report(body1,ga_keys)
ga_report

Unnamed: 0_level_0,ga:users,ga:bounceRate,ga:avgSessionDuration,ga:newUsers,ga:timeOnPage,ga:percentNewSessions
ga:date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20200402,8.0,69.230769,335.230769,8.0,4357.0,61.538462
20200403,3.0,100.000000,0.000000,3.0,0.0,60.000000
20200406,1.0,100.000000,0.000000,1.0,0.0,100.000000
20200407,2.0,50.000000,4.500000,1.0,9.0,50.000000
20200408,9.0,66.666667,17.888889,8.0,161.0,88.888889
...,...,...,...,...,...,...
20221106,1.0,0.000000,157.000000,1.0,157.0,100.000000
20221107,3.0,0.000000,379.000000,3.0,1137.0,100.000000
20221108,4.0,0.000000,2.000000,3.0,9.0,60.000000
20221109,6.0,0.000000,159.714286,5.0,1120.0,71.428571


In [51]:
summary_body = {'reportRequests': [{'viewId': your_view_id, 
                            'dateRanges': [{'startDate': '2019-01-01', 'endDate': '2022-11-10'}],
                            'metrics': [{'expression': 'ga:sessions'}, 
                                        {'expression': 'ga:totalEvents'}, 
                                        {"expression": "ga:avgSessionDuration"}],
                            'dimensions': [{'name': 'ga:country'}],
                          }]}

In [52]:
pivot_body = {'reportRequests': [{'viewId': your_view_id, 
                            'dateRanges': [{'startDate': '2019-01-01', 'endDate': '2022-11-10'}],
                            'dimensions': [{'name':  "ga:channelGrouping"}],
                            "pivots": [{"dimensions": [{"name": 'ga:yearMonth'}],
                                        "metrics": [{"expression": "ga:users"},
                                                    {"expression": "ga:newUsers"},
                                                    {"expression": "ga:timeOnPage"}]
                                       }]
                          }]}

In [53]:
short_body = {  "reportRequests":
  [{
      "viewId": your_view_id,
      "dateRanges": [{"startDate": "7daysAgo", "endDate": "yesterday"}],
      "metrics": [{"expression": "ga:users"}]
    }]}

In [54]:
untidy_body = {'reportRequests': [{'viewId': your_view_id, 
                            'dateRanges': [{'startDate': '2019-01-01', 'endDate': '2022-11-10'}],
                            "pivots": [{"dimensions": [{"name": 'ga:yearMonth'}, {"name": "ga:channelGrouping"}],
                                        "metrics": [{"expression": "ga:users"},
                                                    {"expression": "ga:timeOnPage"}]
                                       }]
                          }]}

In [64]:
body4 = body4 = {'reportRequests': [{'viewId': your_view_id, 
                            'dateRanges': [{'startDate': '2019-01-01', 'endDate': '2022-11-10'}],
                            'metrics': [{'expression': 'ga:users'},
                                        {"expression": "ga:avgSessionDuration"},
                                        {"expression": "ga:sessions"},
                                        {"expression": "ga:newUsers"},
                                       {'expression': 'ga:timeOnPage'},
                                        {'expression': 'ga:avgTimeOnPage'},
                                       {'expression': 'ga:percentNewSessions'},
                                        {'expression': 'ga:organicSearches'},
                                        #{'expression': 'ga:bounces'},
                                        #{'expression': 'ga:cohortTotalUsersWithLifetimeCriteria'},
                                        #{'expression': 'ga:exits'},
                                        #{'expression': 'ga:exitRate'},
                                    {'expression': 'ga:newUsers'}],
                            'dimensions': [{'name': 'ga:date'},
                                           {"name": "ga:userType"},
                                           {"name": "ga:deviceCategory"},
                                         {"name":  "ga:country"}],
                                     "orderBys": [{"fieldName": "ga:date", "sortOrder": "ASCENDING"}],
                          }]}

report = run_report(body4, ga_keys).reset_index() # index is turned into columns to help with visualization
report

Unnamed: 0,ga:date,ga:userType,ga:deviceCategory,ga:country,ga:users,ga:avgSessionDuration,ga:sessions,ga:newUsers,ga:timeOnPage,ga:avgTimeOnPage,ga:percentNewSessions,ga:organicSearches,ga:newUsers.1
0,20200916,New Visitor,desktop,Belgium,4.0,262.25,4.0,4.0,1050.0,105.000000,100.0,1.0,4.0
1,20200916,New Visitor,desktop,Ireland,1.0,70.00,1.0,1.0,70.0,14.000000,100.0,1.0,1.0
2,20200916,New Visitor,desktop,Italy,2.0,0.00,2.0,2.0,0.0,0.000000,100.0,1.0,2.0
3,20200916,New Visitor,desktop,Netherlands,1.0,0.00,1.0,1.0,0.0,0.000000,100.0,0.0,1.0
4,20200916,New Visitor,desktop,Singapore,1.0,0.00,1.0,1.0,0.0,0.000000,100.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,20201228,New Visitor,desktop,Spain,1.0,0.00,1.0,1.0,0.0,0.000000,100.0,0.0,1.0
996,20201228,New Visitor,mobile,Italy,1.0,0.00,1.0,1.0,0.0,0.000000,100.0,1.0,1.0
997,20201228,New Visitor,mobile,Singapore,2.0,0.00,2.0,2.0,0.0,0.000000,100.0,0.0,2.0
998,20201228,Returning Visitor,desktop,India,1.0,0.00,1.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [27]:
ga_report = run_report(summary_body,ga_keys)
ga_report

Unnamed: 0_level_0,ga:sessions,ga:totalEvents,ga:avgSessionDuration
ga:country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
(not set),25.0,0.0,33.120000
Afghanistan,21.0,0.0,44.619048
Albania,20.0,0.0,142.750000
Algeria,20.0,0.0,33.700000
Andorra,2.0,0.0,0.000000
...,...,...,...
Venezuela,31.0,0.0,31.838710
Vietnam,69.0,0.0,31.115942
Yemen,7.0,0.0,35.857143
Zambia,7.0,0.0,78.000000


In [28]:
ga_report1 = run_report(body2,ga_keys)
ga_report1

Unnamed: 0_level_0,ga:users,ga:bounceRate,ga:avgSessionDuration,ga:newUsers,ga:timeOnPage,ga:percentNewSessions
ga:date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20200402,8.0,69.230769,335.230769,8.0,4357.0,61.538462
20200403,3.0,100.000000,0.000000,3.0,0.0,60.000000
20200406,1.0,100.000000,0.000000,1.0,0.0,100.000000
20200407,2.0,50.000000,4.500000,1.0,9.0,50.000000
20200408,9.0,66.666667,17.888889,8.0,161.0,88.888889
...,...,...,...,...,...,...
20221106,1.0,0.000000,157.000000,1.0,157.0,100.000000
20221107,3.0,0.000000,379.000000,3.0,1137.0,100.000000
20221108,4.0,0.000000,2.000000,3.0,9.0,60.000000
20221109,6.0,0.000000,159.714286,5.0,1120.0,71.428571


In [29]:
ga_report1.info


<bound method DataFrame.info of           ga:users  ga:bounceRate  ga:avgSessionDuration  ga:newUsers  \
ga:date                                                                 
20200402       8.0      69.230769             335.230769          8.0   
20200403       3.0     100.000000               0.000000          3.0   
20200406       1.0     100.000000               0.000000          1.0   
20200407       2.0      50.000000               4.500000          1.0   
20200408       9.0      66.666667              17.888889          8.0   
...            ...            ...                    ...          ...   
20221106       1.0       0.000000             157.000000          1.0   
20221107       3.0       0.000000             379.000000          3.0   
20221108       4.0       0.000000               2.000000          3.0   
20221109       6.0       0.000000             159.714286          5.0   
20221110       2.0       0.000000              55.666667          2.0   

          ga:timeO

In [30]:
size = ga_report.shape
print(size)

(157, 3)


In [31]:
#datatype check
ga_report1.dtypes

ga:users                 float64
ga:bounceRate            float64
ga:avgSessionDuration    float64
ga:newUsers              float64
ga:timeOnPage            float64
ga:percentNewSessions    float64
dtype: object

## References:

- Google Analytics (UA) Reporting API for Python = https://www.youtube.com/watch?v=N2bbJQroGB4
- https://console.cloud.google.com/apis/api/analyticsreporting.googleapis.com/credentials?project=hallowed-valve-368511
- Basics of report creation: https://developers.google.com/analytics/devguides/reporting/core/v4/basics
- Parameters = https://ga-dev-tools.web.app/dimensions-metrics-explorer/time/#date

https://github.com/tanyazyabkina/GoogleAnalyticsReportingAPI_python/blob/main/Google%20Analytics%20Reporting%20API%20GitHub.ipynb