In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%%capture
!pip install --upgrade google-api-python-client
!pip install --upgrade oauth2client

In [None]:
"""Hello Analytics Reporting API V4."""
import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
#KEY_FILE_LOCATION = '/content/drive/MyDrive/idc/googleAnalyticsApi/googleAnalyticsApiServiceAccount.json'
KEY_FILE_LOCATION = 'googleAnalyticsApiServiceAccount.json'

In [None]:
credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)
service = build('analyticsreporting', 'v4', credentials=credentials)

In [None]:
VIEW_DATA = [
    {'view_id': '227406749', 'website': 'discourse'},
    {'view_id': '227111584', 'website': 'learn'},
    {'view_id': '229351241', 'website': 'portal'},
    {'view_id': '229309489', 'website': 'viewer'}
]

In [None]:
def initialize_analyticsreporting():
  """Initializes an Analytics Reporting API V4 service object.

  Returns:
    An authorized Analytics Reporting API V4 service object.
  """
  credentials = ServiceAccountCredentials.from_json_keyfile_name(
      KEY_FILE_LOCATION, SCOPES)

  # Build the service object.
  analytics = build('analyticsreporting', 'v4', credentials=credentials)

  return analytics

In [None]:
def audience_report(analytics, view_id, page_token=None):
    """Queries the Analytics Reporting API V4 with pagination.

    Args:
        analytics: An authorized Analytics Reporting API V4 service object.
        view_id: The view ID to query.
        page_token: Page token for pagination.
    Returns:
        The Analytics Reporting API V4 response.
    """
    request_body = {
        'viewId': view_id,
        'dateRanges': [{'startDate': '1500daysAgo', 'endDate': 'today'}],
        'metrics': [
            {'expression': 'ga:sessions'},
            {'expression': 'ga:users'},
            {'expression': 'ga:newUsers'},
            {'expression': 'ga:pageviews'},
            {'expression': 'ga:sessionDuration'},
            {'expression': 'ga:bounces'}
        ],
        'dimensions': [
            {'name': 'ga:date'},
            {'name': 'ga:country'}
        ],
        'pageSize': 1000,  # Adjust as needed
        'pageToken': page_token
    }

    response = analytics.reports().batchGet(body={'reportRequests': [request_body]}).execute()
    return response

def parse_report(response, view_id, website):
    """Parses the Analytics Reporting API V4 response into a DataFrame.

    Args:
        response: The Analytics Reporting API V4 response.
        view_id: The view ID.
        website: The corresponding website name.
    Returns:
        A pandas DataFrame containing the parsed data.
    """
    data_rows = response['reports'][0]['data']['rows']
    parsed_data = []

    for row in data_rows:
        dimensions = row['dimensions']
        metrics = row['metrics'][0]['values']  # Adjust the index based on the metric you want

        parsed_row = [view_id, website] + dimensions + metrics
        parsed_data.append(parsed_row)

    columns = ['viewId', 'website', 'date', 'country',
               'sessions', 'users', 'newUsers', 'pageviews', 'session_Duration', 'bounces',
               ]  # Move this line outside the loop

    df = pd.DataFrame(parsed_data, columns=columns)
    return df

# Initialize the Analytics Reporting API V4 service
analytics = initialize_analyticsreporting()

# Fetch and parse the report data for each view
parsed_dfs = []
for view_info in VIEW_DATA:
    print(view_info['view_id'])
    response = audience_report(analytics, view_info['view_id'])
    view_data = []  # Create a list to store data for the current view

    while 'reports' in response and 'data' in response['reports'][0] and 'rows' in response['reports'][0]['data']:
        view_data.extend(response['reports'][0]['data']['rows'])  # Append data for the current view
        page_token = response['reports'][0].get('nextPageToken')
        if page_token:
            response = audience_report(analytics, view_info['view_id'], page_token)
        else:
            break

    # Parse the report data for the current view
    parsed_df = parse_report({'reports': [{'data': {'rows': view_data}}]},
                             view_info['view_id'], view_info['website'])
    parsed_dfs.append(parsed_df)

# Concatenate all parsed DataFrames into a single DataFrame
combined_audience_df = pd.concat(parsed_dfs, ignore_index=True)

# Convert the date column to datetime format
combined_audience_df['date'] = pd.to_datetime(combined_audience_df['date'], format='%Y%m%d')

# Display the combined DataFrame
combined_audience_df



227406749
227111584
229351241
229309489


Unnamed: 0,viewId,website,date,country,sessions,users,newUsers,pageviews,session_Duration,bounces
0,227406749,discourse,2020-08-26,United States,2,2,2,3,9.0,1
1,227406749,discourse,2020-08-27,United States,13,8,7,33,6243.0,6
2,227406749,discourse,2020-08-28,United States,8,5,3,22,1358.0,4
3,227406749,discourse,2020-08-29,United States,1,1,0,2,6.0,0
4,227406749,discourse,2020-08-30,United States,2,2,0,2,0.0,2
...,...,...,...,...,...,...,...,...,...,...
23924,229309489,viewer,2023-07-04,Peru,1,1,0,2,0.0,0
23925,229309489,viewer,2023-07-04,Slovakia,1,1,1,1,0.0,1
23926,229309489,viewer,2023-07-04,Thailand,1,1,1,1,0.0,1
23927,229309489,viewer,2023-07-04,United Kingdom,1,1,1,6,632.0,0




In [None]:
import pandas as pd
from datetime import datetime

# Define the metrics columns and their corresponding display names
metrics_columns = {
    'sessions': 'Sessions',
    'users': 'Users',
    'newUsers': 'New Users',
    'pageviews': 'Pageviews',
    'session_Duration': 'Session Duration',
    'bounces': 'Bounces'
}

# Cast the columns to float before aggregation
for col in metrics_columns.keys():
    combined_audience_df[col] = combined_audience_df[col].astype(float)

# Group the DataFrame by 'website' and 'date', and calculate aggregates for metrics columns
grouped_audience_df = combined_audience_df.groupby(['website', 'date'])[list(metrics_columns.keys())].sum().reset_index()

# Create a list to store rows for the summary table
summary_rows = []

# Populate the summary rows
for index, row in grouped_audience_df.iterrows():
    date = row['date']
    for metric_column, display_name in metrics_columns.items():
        summary_rows.append({
            'date': date,
            'aggregate': float(row[metric_column]),
            'metric_name': display_name,
            'website': row['website']
        })

# Create the summary table DataFrame using pandas.concat
audience_summary_table = pd.concat([pd.DataFrame([row]) for row in summary_rows], ignore_index=True)


# Display the summary table
audience_summary_table


Unnamed: 0,date,aggregate,metric_name,website
0,2020-08-26,2.0,Sessions,discourse
1,2020-08-26,2.0,Users,discourse
2,2020-08-26,2.0,New Users,discourse
3,2020-08-26,3.0,Pageviews,discourse
4,2020-08-26,9.0,Session Duration,discourse
...,...,...,...,...
23365,2023-07-04,11.0,Users,viewer
23366,2023-07-04,6.0,New Users,viewer
23367,2023-07-04,17.0,Pageviews,viewer
23368,2023-07-04,632.0,Session Duration,viewer




In [None]:
def get_acquisition_report(analytics, view_id, page_token=None):
    """Queries the Acquisition Report using the Analytics Reporting API V4 with pagination.

    Args:
        analytics: An authorized Analytics Reporting API V4 service object.
        view_id: The view ID to query.
        page_token: Page token for pagination.
    Returns:
        The Acquisition Report response.
    """
    request_body = {
        'viewId': view_id,
        'dateRanges': [{'startDate': '3650daysAgo', 'endDate': 'today'}],
        'metrics': [
            {'expression': 'ga:users'},
            {'expression': 'ga:newUsers'},
            {'expression': 'ga:sessions'},
            {'expression': 'ga:bounces'},
            {'expression': 'ga:pageViews'},
            {'expression': 'ga:sessionDuration'}
        ],
        'dimensions': [
            {'name': 'ga:date'},
            {'name': 'ga:channelGrouping'},
            {'name': 'ga:sourceMedium'},
            {'name': 'ga:source'},
            {'name': 'ga:medium'}
        ],
        'pageSize': 1000,  # Adjust as needed
        'pageToken': page_token
    }

    response = analytics.reports().batchGet(body={'reportRequests': [request_body]}).execute()
    return response

def parse_acquisition_report(response, view_id, website):
    """Parses the Acquisition Report response into a DataFrame.

    Args:
        response: The Acquisition Report response.
        view_id: The view ID.
        website: The corresponding website name.
    Returns:
        A pandas DataFrame containing the parsed data.
    """
    data_rows = response['reports'][0]['data']['rows']
    parsed_data = []

    for row in data_rows:
        dimensions = row['dimensions']
        metrics = row['metrics'][0]['values']

        parsed_row = [view_id, website] + dimensions + metrics
        parsed_data.append(parsed_row)

    columns = ['viewId', 'website', 'date', 'channelGrouping', 'source_Medium',
              'source', 'medium', 'users', 'newUsers', 'sessions', 'bounces',
              'pageViews', 'sessionDuration']


    df = pd.DataFrame(parsed_data, columns=columns)
    return df

# Initialize the Analytics Reporting API V4 service
analytics = initialize_analyticsreporting()

# Initialize the Analytics Reporting API V4 service
analytics = initialize_analyticsreporting()

# Create a list to store parsed Acquisition DataFrames
acquisition_dfs = []

# Fetch and parse the Acquisition Report data for each view
for view_info in VIEW_DATA:
    all_data = []
    response = get_acquisition_report(analytics, view_info['view_id'])
    while 'reports' in response and 'data' in response['reports'][0] and 'rows' in response['reports'][0]['data']:
        all_data.extend(response['reports'][0]['data']['rows'])
        page_token = response['reports'][0].get('nextPageToken')
        if page_token:
            response = get_acquisition_report(analytics, view_info['view_id'], page_token)
        else:
            break

    acquisition_df = parse_acquisition_report({'reports': [{'data': {'rows': all_data}}]},
                                               view_info['view_id'], view_info['website'])
    acquisition_dfs.append(acquisition_df)

# Concatenate all parsed Acquisition DataFrames into a single DataFrame
combined_acquisition_df = pd.concat(acquisition_dfs, ignore_index=True)

# Convert the date column to datetime format
combined_acquisition_df['date'] = pd.to_datetime(combined_acquisition_df['date'], format='%Y%m%d')

# Display the combined Acquisition DataFrame
combined_acquisition_df

Unnamed: 0,viewId,website,date,channelGrouping,source_Medium,source,medium,users,newUsers,sessions,bounces,pageViews,sessionDuration
0,227406749,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),5,1,8,3,23,2851.0
1,227406749,discourse,2021-06-10,Referral,accounts.google.com / referral,accounts.google.com,referral,2,0,2,0,12,2008.0
2,227406749,discourse,2021-06-10,Social,accounts.youtube.com / referral,accounts.youtube.com,referral,5,0,8,0,36,4518.0
3,227406749,discourse,2021-06-11,Direct,(direct) / (none),(direct),(none),3,0,3,2,6,126.0
4,227406749,discourse,2021-06-11,Referral,accounts.google.com / referral,accounts.google.com,referral,1,0,1,1,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12422,229309489,viewer,2023-07-03,Referral,learn.canceridc.dev / referral,learn.canceridc.dev,referral,1,1,1,1,1,0.0
12423,229309489,viewer,2023-07-04,Direct,(direct) / (none),(direct),(none),8,4,8,6,14,632.0
12424,229309489,viewer,2023-07-04,Referral,cs666.org / referral,cs666.org,referral,1,0,1,1,1,0.0
12425,229309489,viewer,2023-07-04,Referral,discourse.slicer.org / referral,discourse.slicer.org,referral,1,1,1,1,1,0.0


In [None]:
import pandas as pd

# Define the metrics columns and their corresponding display names
metrics_columns = {
    'sessions': 'Sessions',
    'users': 'Users',
    'newUsers': 'New Users',
    'bounces': 'Bounces',
    'pageViews': 'Pageviews',
    'sessionDuration': 'Session Duration'
}

# Create a dictionary to map metrics to KPI types
kpi_mapping = {
    'sessions': 'acquisition',
    'users': 'acquisition',
    'newUsers': 'acquisition',
    'bounces': 'behavior',
    'pageViews': 'behavior',
    'sessionDuration': 'behavior'
}

# Cast the columns to float before aggregation
for col in metrics_columns.keys():
    combined_acquisition_df[col] = combined_acquisition_df[col].astype(float)

# Group the DataFrame by multiple columns
grouped_acquisition_df = combined_acquisition_df.groupby([
    'website', 'date', 'channelGrouping', 'source_Medium', 'source', 'medium'
])[list(metrics_columns.keys())].sum().reset_index()

# Create a list to store rows for the summary table
summary_rows = []

# Populate the summary rows
for index, row in grouped_acquisition_df.iterrows():
    date = row['date']
    channel_grouping = row['channelGrouping']
    for metric_column, display_name in metrics_columns.items():
        kpi = kpi_mapping.get(metric_column, 'unknown')  # Get KPI type based on metric_column
        summary_rows.append({
            'website': row['website'],
            'date': date,
            'channelGrouping': channel_grouping,
            'source_Medium': row['source_Medium'],
            'source': row['source'],
            'medium': row['medium'],
            'metric_name': display_name,
            'kpi': kpi,
            'aggregates': float(row[metric_column])
        })

# Create the summary table DataFrame using pandas.concat
acquisition_summary_table = pd.DataFrame(summary_rows)

# Display the summary table
acquisition_summary_table


Unnamed: 0,website,date,channelGrouping,source_Medium,source,medium,metric_name,kpi,aggregates
0,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),Sessions,acquisition,8.0
1,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),Users,acquisition,5.0
2,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),New Users,acquisition,1.0
3,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),Bounces,behavior,3.0
4,discourse,2021-06-10,Direct,(direct) / (none),(direct),(none),Pageviews,behavior,23.0
...,...,...,...,...,...,...,...,...,...
74557,viewer,2023-07-04,Social,groups.google.com / referral,groups.google.com,referral,Users,acquisition,1.0
74558,viewer,2023-07-04,Social,groups.google.com / referral,groups.google.com,referral,New Users,acquisition,1.0
74559,viewer,2023-07-04,Social,groups.google.com / referral,groups.google.com,referral,Bounces,behavior,1.0
74560,viewer,2023-07-04,Social,groups.google.com / referral,groups.google.com,referral,Pageviews,behavior,1.0


In [None]:
def get_behavior_report(analytics, view_id, page_token=None):
    """Queries the Behavior Report using the Analytics Reporting API V4 with pagination.

    Args:
        analytics: An authorized Analytics Reporting API V4 service object.
        view_id: The view ID to query.
        page_token: Page token for pagination.
    Returns:
        The Behavior Report response.
    """
    request_body = {
        'viewId': view_id,
        'dateRanges': [{'startDate': '3650daysAgo', 'endDate': 'today'}],
        'metrics': [
            {'expression': 'ga:pageviews'},
            {'expression': 'ga:uniquePageviews'},
            {'expression': 'ga:exits'},
            {'expression': 'ga:bounces'},
            {'expression': 'ga:entrances'},
            {'expression': 'ga:TimeOnPage'},
            {'expression': 'ga:sessions'}
        ],
        'dimensions': [
            {'name': 'ga:date'},
            {'name': 'ga:pagePath'},
            {'name': 'ga:pageTitle'}
        ],
        'pageSize': 1000,  # Adjust as needed
        'pageToken': page_token
    }

    response = analytics.reports().batchGet(body={'reportRequests': [request_body]}).execute()
    return response


def parse_behavior_report(response, view_id, website):
    """Parses the Behavior Report response into a DataFrame.

    Args:
        response: The Behavior Report response.
        view_id: The view ID.
        website: The corresponding website name.
    Returns:
        A pandas DataFrame containing the parsed data.
    """
    data_rows = response['reports'][0]['data']['rows']
    parsed_data = []

    for row in data_rows:
        dimensions = row['dimensions']
        metrics = row['metrics'][0]['values']

        parsed_row = [view_id, website] + dimensions + metrics
        parsed_data.append(parsed_row)

    columns = ['viewId', 'website', 'date', 'pagePath', 'pageTitle',
              'pageviews', 'uniquePageviews', 'exits', 'bounces', 'entrances', 'TimeOnPage', 'sessions']


    df = pd.DataFrame(parsed_data, columns=columns)
    return df

# Initialize the Analytics Reporting API V4 service
analytics = initialize_analyticsreporting()

# Create a list to store parsed Behavior DataFrames
behavior_dfs = []

# Fetch and parse the Behavior Report data for each view
for view_info in VIEW_DATA:
    all_data = []
    response = get_behavior_report(analytics, view_info['view_id'])
    while 'reports' in response and 'data' in response['reports'][0] and 'rows' in response['reports'][0]['data']:
        all_data.extend(response['reports'][0]['data']['rows'])
        page_token = response['reports'][0].get('nextPageToken')
        if page_token:
            response = get_behavior_report(analytics, view_info['view_id'], page_token)
        else:
            break

    behavior_df = parse_behavior_report({'reports': [{'data': {'rows': all_data}}]},
                                     view_info['view_id'], view_info['website'])
    behavior_dfs.append(behavior_df)

# Concatenate all parsed Behavior DataFrames into a single DataFrame
combined_behavior_df = pd.concat(behavior_dfs, ignore_index=True)

# Convert the date column to datetime format
combined_behavior_df['date'] = pd.to_datetime(combined_behavior_df['date'], format='%Y%m%d')

# Display the combined Behavior DataFrame
combined_behavior_df



Unnamed: 0,viewId,website,date,pagePath,pageTitle,pageviews,uniquePageviews,exits,bounces,entrances,TimeOnPage,sessions
0,227406749,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,1,1,1,1,1,0.0,1
1,227406749,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162/17,TCGA-GBM tutorial notebook - Support - Imaging...,2,1,1,0,1,144.0,1
2,227406749,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162/18,TCGA-GBM tutorial notebook - Support - Imaging...,67,15,15,1,15,9234.0,15
3,227406749,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162/20,TCGA-GBM tutorial notebook - Support - Imaging...,1,1,1,1,1,0.0,1
4,227406749,discourse,2021-06-11,/,Imaging Data Commons - Imaging Data Commons forum,4,4,3,3,3,3.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...
60688,229309489,viewer,2023-07-04,/viewer/1.3.6.1.4.1.14519.5.2.1.6450.4004.1093...,OHIF Viewer,1,1,1,1,1,0.0,1
60689,229309489,viewer,2023-07-04,/viewer/1.3.6.1.4.1.14519.5.2.1.7695.1700.1861...,OHIF Viewer,1,1,1,1,1,0.0,1
60690,229309489,viewer,2023-07-04,/viewer/1.3.6.1.4.1.32722.99.99.20371500380599...,(not set),1,1,1,1,1,0.0,1
60691,229309489,viewer,2023-07-04,/viewer/1.3.6.1.4.1.32722.99.99.23934135391171...,OHIF Viewer,1,1,1,1,1,0.0,1


In [None]:
import pandas as pd

# Define the metrics columns and their corresponding display names
metrics_columns = {
    'pageviews': 'Pageviews',
    'uniquePageviews': 'Unique Pageviews',
    'exits': 'Exits',
    'bounces': 'Bounces',
    'entrances': 'Entrances',
    'TimeOnPage': 'Time on Page',
    'sessions': 'Sessions'
}

# Cast the columns to float before aggregation
for col in metrics_columns.keys():
    combined_behavior_df[col] = combined_behavior_df[col].astype(float)

# Group the DataFrame by 'website', 'date', 'pagePath', and 'pageTitle'
grouped_behavior_df = combined_behavior_df.groupby([
    'website', 'date', 'pagePath', 'pageTitle'
])[list(metrics_columns.keys())].sum().reset_index()

# Create a list to store rows for the summary table
summary_rows = []

# Populate the summary rows
for index, row in grouped_behavior_df.iterrows():
    date = row['date']
    for metric_column, display_name in metrics_columns.items():
        summary_rows.append({
            'website': row['website'],
            'date': date,
            'pagePath': row['pagePath'],
            'pageTitle': row['pageTitle'],
            'metric_name': display_name,
            'aggregates': float(row[metric_column])
        })

# Create the summary table DataFrame using pandas.concat
behavior_summary_table = pd.DataFrame(summary_rows)

# Display the summary table
behavior_summary_table


Unnamed: 0,website,date,pagePath,pageTitle,metric_name,aggregates
0,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,Pageviews,1.0
1,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,Unique Pageviews,1.0
2,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,Exits,1.0
3,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,Bounces,1.0
4,discourse,2021-06-10,/t/tcga-gbm-tutorial-notebook/162,TCGA-GBM tutorial notebook - Support - Imaging...,Entrances,1.0
...,...,...,...,...,...,...
424846,viewer,2023-07-04,/viewer/1.3.6.1.4.1.9328.50.17.297239603209803...,OHIF Viewer,Exits,1.0
424847,viewer,2023-07-04,/viewer/1.3.6.1.4.1.9328.50.17.297239603209803...,OHIF Viewer,Bounces,1.0
424848,viewer,2023-07-04,/viewer/1.3.6.1.4.1.9328.50.17.297239603209803...,OHIF Viewer,Entrances,1.0
424849,viewer,2023-07-04,/viewer/1.3.6.1.4.1.9328.50.17.297239603209803...,OHIF Viewer,Time on Page,0.0


In [None]:

# - **Bounce Rate**: Bounce rate is the percentage of single-page sessions on your site. It is calculated by dividing the number of single-page sessions by the total number of sessions. The formula for calculating bounce rate is:

#     `Bounce Rate = (Total Bounces / Total Sessions) * 100`

# - **Exit Rate**: Exit rate is the percentage of exits from a page, regardless of how many pages the user viewed before leaving. It is calculated by dividing the number of exits from a page by the total number of pageviews for that page. The formula for calculating exit rate is:

#     `Exit Rate = (Total Exits / Total Pageviews) * 100`

# - **Average Time on Page**: Average time on page is the average amount of time users spent viewing a specified page or screen. It is calculated by dividing the total time spent on a page by the number of visits to that page, excluding exits. The formula for calculating average time on page is:

#     `Average Time on Page = (Total Time on Page - Time on Page for Exits) / (Pageviews - Exits)`

# I hope this helps! Let me know if you have any other questions. 😊

In [None]:
# Overall Average Time on Page = (Sum of Time on Page - Sum of Time on Page for Exits) / (Sum of Pageviews - Sum of Exits)

In [None]:
from google.colab import auth
from google.cloud import bigquery
auth.authenticate_user()

In [None]:
# Create a LoadJobConfig object with the write_disposition option set to WRITE_TRUNCATE
job_config = bigquery.LoadJobConfig(schema=[bigquery.SchemaField("date", bigquery.enums.SqlTypeNames.TIMESTAMP)],
                                            write_disposition="WRITE_TRUNCATE")

In [None]:
audience_df_job = client.load_table_from_dataframe(combined_audience_df, "idc-external-025.logs.google_analytics_ua_audience", job_config=job_config)
acquisition_df_job = client.load_table_from_dataframe(combined_acquisition_df, "idc-external-025.logs.google_analytics_ua_acquisition", job_config=job_config)
behavior_df_job = client.load_table_from_dataframe(combined_behavior_df, "idc-external-025.logs.google_analytics_ua_behavior", job_config=job_config)

In [None]:
audience_summary_job_config = bigquery.LoadJobConfig(schema=[
                                            bigquery.SchemaField("date", bigquery.enums.SqlTypeNames.TIMESTAMP),
                                            bigquery.SchemaField("aggregate", bigquery.enums.SqlTypeNames.FLOAT64),
                                            bigquery.SchemaField("metric_name", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("website", bigquery.enums.SqlTypeNames.STRING)
                                            ],
                                            write_disposition="WRITE_TRUNCATE")

audience_summary_job = client.load_table_from_dataframe(audience_summary_table, "idc-external-025.logs.google_analytics_ua_audience_summary", job_config=audience_summary_job_config)

In [None]:
acquisition_summary_job_config = bigquery.LoadJobConfig(schema=[
                                            bigquery.SchemaField("website", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("date", bigquery.enums.SqlTypeNames.TIMESTAMP),
                                            bigquery.SchemaField("channelGrouping", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("source_Medium", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("source", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("medium", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("metric_name", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("kpi", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("aggregates", bigquery.enums.SqlTypeNames.FLOAT64)
                                            ],
                                            write_disposition="WRITE_TRUNCATE")

acquisition_df_job = client.load_table_from_dataframe(acquisition_summary_table, "idc-external-025.logs.google_analytics_ua_acquisition_summary", job_config=acquisition_summary_job_config)

In [None]:
behavior_summary_job_config = bigquery.LoadJobConfig(schema=[
                                            bigquery.SchemaField("website", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("date", bigquery.enums.SqlTypeNames.TIMESTAMP),
                                            bigquery.SchemaField("pagePath", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("pageTitle", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("metric_name", bigquery.enums.SqlTypeNames.STRING),
                                            bigquery.SchemaField("aggregates", bigquery.enums.SqlTypeNames.FLOAT64)
                                            ],
                                            write_disposition="WRITE_TRUNCATE")
behavior_df_job = client.load_table_from_dataframe(behavior_summary_table, "idc-external-025.logs.google_analytics_ua_behavior_summary", job_config=behavior_summary_job_config)