In [4]:
import json
from google.oauth2 import service_account
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest
import pandas as pd
import config

# Reload the config module (useful if you've made changes to config.py and want to reload it in Jupyter Notebook)
#import importlib
#importlib.reload(config)

# Path to your service account key file
KEY_FILE_LOCATION = config.KEY_FILE_LOCATION

# Google Analytics property ID
PROPERTY_ID = config.PROPERTY_ID

# Define the credentials and initialize the Analytics Data API client
credentials = service_account.Credentials.from_service_account_file(KEY_FILE_LOCATION)
client = BetaAnalyticsDataClient(credentials=credentials)

# Define the date range
date_ranges = [DateRange(start_date="2024-01-01", end_date="2024-01-01")]

# Define dimensions and metrics
dimensions = [
    Dimension(name="date"),
    Dimension(name="eventName"),
    Dimension(name="deviceCategory"),
    Dimension(name="country"),
    Dimension(name="city"),
    Dimension(name="sessionSourceMedium"),
    Dimension(name="unifiedPagePathScreen"),
]

metrics = [
    Metric(name="eventCount"),
    Metric(name="activeUsers")
]

# Create the API request
request = RunReportRequest(
    property=f"properties/{PROPERTY_ID}",
    date_ranges=date_ranges,
    dimensions=dimensions,
    metrics=metrics,
    #order_bys=[{"dimension": {"name": "date"}}]  # Order by date
)

# Make the request and handle potential errors
try:
    response = client.run_report(request)
except Exception as e:
    print(f"An error occurred: {e}")
    exit(1)

# Process the response and convert it to a DataFrame
def process_response(response):
    data = []
    for row in response.rows:
        row_data = {}
        for i, dimension_value in enumerate(row.dimension_values):
            row_data[dimensions[i].name] = dimension_value.value
        for i, metric_value in enumerate(row.metric_values):
            row_data[metrics[i].name] = metric_value.value
        data.append(row_data)
    df = pd.DataFrame(data)
    return df

# Create the DataFrame and sort it by date
df = process_response(response)
df.sort_values(by="date", inplace=True)

# Print the DataFrame
df


Unnamed: 0,date,eventName,deviceCategory,country,city,sessionSourceMedium,unifiedPagePathScreen,eventCount,activeUsers
0,20240101,custom_scroll,mobile,Canada,Vancouver,google / organic,/people,38,1
2230,20240101,page_view,mobile,Canada,Trois-Rivieres,google / organic,/people/daswieczorek-andrea,1,1
2231,20240101,page_view,mobile,Canada,Val-d'Or,google / organic,/en/people/kraag-scott-a,1,1
2232,20240101,page_view,mobile,Canada,Val-d'Or,google / organic,/en/people/neil-stephen,1,1
2233,20240101,page_view,mobile,Canada,Vancouver,google / organic,/,1,1
...,...,...,...,...,...,...,...,...,...
1122,20240101,custom_scroll,desktop,Canada,Toronto,reddit.com / referral,/our-latest-thinking/staying-current,1,1
1123,20240101,custom_scroll,desktop,Canada,Vancouver,google / organic,/en/people/cusano-luigi-a,1,1
1124,20240101,custom_scroll,desktop,Canada,Winnipeg,google / organic,/,1,1
1152,20240101,custom_scroll,desktop,United States,(not set),google / organic,/our-latest-thinking/publications/2021/11/risk...,1,1


In [14]:
# Export DataFrame to an Excel file
df.to_excel('events.xlsx', index=False)

print("DataFrame has been exported")


DataFrame has been exported to 'output.xlsx'
