# Google Analytics API - Python

# Install Packages

In [None]:
#Load Libraries
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient.discovery import build
import httplib2
import pandas as pd
#Packages needed for connecting with Google API
from googleapiclient.discovery import build as google_build #An example with all the statements together
import google.oauth2.credentials #Importing a sub-package
from google_auth_httplib2 import AuthorizedHttp
import sqlalchemy

# Create a service object

In [None]:
#Rename your JSON key to client_secrets.json and save it to your working folder
##Get service_account_GS.JSON from your console project ("https://console.cloud.google.com/")
credentials = ServiceAccountCredentials.from_json_keyfile_name('service_account_GS.json', ['https://www.googleapis.com/auth/analytics.readonly'])
  
#Create a service object
http = credentials.authorize(httplib2.Http())
service = build('analytics', 'v4', http=http, discoveryServiceUrl=('https://analyticsreporting.googleapis.com/$discovery/rest'))

#Authorize client
authorized = AuthorizedHttp(credentials=credentials)

# Set pagePath Filter

In [None]:
# regex='~^/stiri/.*'   ##add any regex you'd want here 

# Make an api-call to Google Analytics

In [None]:
response = service.reports().batchGet(
    body={
        'reportRequests': [
            {
                'viewId': {'add-your-google-analytics-view-id-here'},
                'dateRanges': [{'startDate': {'enter-startdate-here in YY-MM0DD'}, 'endDate': 'enter-enddate-here in YY-MM0DD],
                'metrics': [{'expression': 'ga:entrances'}],  ##add metrics you'd want here
                'dimensions': [{'name': 'ga:date'},{"name": "ga:source"},{"name":"ga:landingPagePath"},{"name":"ga:pageTitle"}],   ##add dimensions you'd want here
#                 "filtersExpression":f"ga:landingPagePath={regex}",
                'orderBys': [{"fieldName": "ga:date", "sortOrder": "DESCENDING"}],
                'pageSize': 100000
            }]
    }
).execute()

response

# Parsing The Report Data

In [None]:
##response above is heavily nested, you need to parse to get the data you'd be looking for
def prase_response(report):

    """Parses and prints the Analytics Reporting API V4 response"""
    #Initialize results, in list format because two dataframes might return
    result_list = []

    #Initialize empty data container for the two dateranges (if there are two that is)
    data_csv = []
    data_csv2 = []

    #Initialize header rows
    header_row = []

    #Get column headers, metric headers, and dimension headers.
    columnHeader = report.get('columnHeader', {})
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    dimensionHeaders = columnHeader.get('dimensions', [])

    #Combine all of those headers into the header_row, which is in a list format
    for dheader in dimensionHeaders:
        header_row.append(dheader)
    for mheader in metricHeaders:
        header_row.append(mheader['name'])

    #Get data from each of the rows, and append them into a list
    rows = report.get('data', {}).get('rows', [])
    for row in rows:
        row_temp = []
        dimensions = row.get('dimensions', [])
        metrics = row.get('metrics', [])
        for d in dimensions:
            row_temp.append(d)
        for m in metrics[0]['values']:
            row_temp.append(m)
        data_csv.append(row_temp)

        #In case of a second date range, do the same thing for the second request
        if len(metrics) == 2:
            row_temp2 = []
            for d in dimensions:
                row_temp2.append(d)
            for m in metrics[1]['values']:
                row_temp2.append(m)
            data_csv2.append(row_temp2)

    #Putting those list formats into pandas dataframe, and append them into the final result
    result_df = pd.DataFrame(data_csv, columns=header_row)
    result_list.append(result_df)
    if data_csv2 != []:
        result_list.append(pd.DataFrame(data_csv2, columns=header_row))

    return result_list

# Create a DataFrame With Your Data

In [None]:
response_data = response.get('reports', [])[0]
print(prase_response(response_data)[0])

In [None]:
## transform response-date to a dataframe that you can manipulate
report=pd.DataFrame(prase_response(response_data)[0])
report

In [None]:
## save your response to a csv called 'report.csv'. This is automatic, you don't have to have a CSV by this name, if you do,caution, data in it will be re-written. 
df = report.to_csv('report.csv')