In [None]:
# bob.refresh {"cron": "*/30 * * * *", "timeout": 40000}

In [None]:
import bob
BHC = bob.HealthCheck()
BHC_KEY = "608ba515-2285-45ad-92d4-47491493ac12"
START = BHC.start(BHC_KEY)
print(START)

In [None]:
!pip install --upgrade --user google-api-python-client oauth2client

In [None]:
%run __init__.ipynb

In [None]:
"""
Hello Analytics Reporting API V4.
"""
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = PARAM_FOLDER + 'analytics_id_read.json'
VIEW_ID = '214312751'
METRICS = [{'expression': 'ga:users'}]
# METRICS = [{'expression': 'ga:sessions'}]
DIMENTIONS = [{'name': 'ga:country'}]
FIRSTDATA = 30


def initialize_analyticsreporting():
    """
    Initializes an Analytics Reporting API V4 service object.

    Returns:
    An authorized Analytics Reporting API V4 service object.
    """
    credentials = (ServiceAccountCredentials.
                   from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES))

    # Build the service object.
    analytics = build('analyticsreporting',
                      'v4',
                      credentials=credentials)

    return analytics


ANALYTICS = initialize_analyticsreporting()

In [None]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
from datetime import date
from dateutil.rrule import rrule
from dateutil.rrule import DAILY
from time import sleep
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import time


NOW = datetime.now().strftime('%d/%m/%Y %H:%M:%S')


def convert_reponse_to_df(response, adate=None):
    """
    doc string
    """
    alist = []
    # parse report data
    for report in response.get('reports', []):
        column_header = report.get('columnHeader', {})
        dimension_headers = column_header.get('dimensions', [])
        metric_headers = (column_header.
                          get('metricHeader', {}).
                          get('metricHeaderEntries', []))
        rows = report.get('data', {}).get('rows', [])

        for row in rows:
            adict = {}
            dimensions = row.get('dimensions', [])
            date_range_values = row.get('metrics', [])

            for header, dimension in zip(dimension_headers,
                                         dimensions):
                adict[header] = dimension

            for idx, values in enumerate(date_range_values):
                for metric, value in zip(metric_headers,
                                         values.get('values')):
                    if ',' in value or ',' in value:
                        adict[metric.get('name')] = float(value)
                    else:
                        adict[metric.get('name')] = int(value)
                    if date is not None:
                        adict['date'] = adate
            alist.append(adict)

        dataf = pd.DataFrame(alist)
        return dataf


def get_report(analytics, start_date, end_date,
               view_id, metrics, dimensions,
               dim_filt_clau=[], segments=[]):
    """
    doc string
    """
    body = {'reportRequests': [{'viewId': view_id,
                                'dateRanges': [{'startDate': start_date,
                                                'endDate': end_date}],
                                'metrics': metrics,
                                'dimensions': dimensions,
                                'pageSize': 10000,
                                'dimensionFilterClauses': dim_filt_clau,
                                'segments': segments}]}
    # print('body', body)
    return analytics.reports().batchGet(body=body).execute()


def return_ga_data(start_date, end_date, view_id, metrics,
                   dimensions, split_dates, group_by=[],
                   dim_filt_clau=[], segments=[]):
    """
    doc strring
    """
    if not split_dates:
        # FIXME: analytics comes from ?
        return convert_reponse_to_df(get_report(analytics, start_date,
                                                end_date, view_id,
                                                metrics, dimensions,
                                                dim_filt_clau,
                                                segments))
    else:
        start_date = datetime.strptime(start_date, '%Y-%m-%d').date()
        end_date = datetime.strptime(end_date, '%Y-%m-%d').date()

        df_total = pd.DataFrame()
        for adate in rrule(freq=DAILY, dtstart=start_date, until=end_date):
            adate = str(adate.date())
            # FIXME: analytics comes from ?
            df_total = (df_total.
                        append(convert_reponse_to_df(get_report(analytics,
                                                                adate,
                                                                adate,
                                                                view_id,
                                                                metrics,
                                                                dimensions,
                                                                dim_filt_clau,
                                                                segments),
                                                     adate)))

        if group_by:
            df_total = df_total.groupby(group_by).sum()
        return df_total


THIRTYDAYSAGO = (datetime.now() - timedelta(FIRSTDATA)).strftime("%Y-%m-%d")
TODAY = datetime.now().strftime("%Y-%m-%d")

START_TIME = time.time()
DATAF = return_ga_data(THIRTYDAYSAGO, TODAY, VIEW_ID,
                       METRICS, DIMENTIONS, True)
print("Script execution completed at "
      f"{NOW}. "
      f"Time: --- {time.time() - START_TIME} secnds ---")

In [None]:
def domain_301(dataf):
    """
    dooc string
    """
    cols_to_rename = {"ga:country": "ENTITY",
                      "date": "DATE",
                      "ga:users": "VALUE_D"}
    domain = dataf.copy().rename(index=str,
                                 columns=cols_to_rename).fillna("Not defined")
    domain['DATE'] = pd.to_datetime(domain['DATE'],
                                    format='%Y-%m-%d').dt.strftime('%d/%m/%Y')

    # -- Consolidate data
    domain_ww = domain.copy()
    domain_ww['ENTITY'] = 'WORLDWIDE'

    # Concat & groupby
    domain_final = pd.concat([domain, domain_ww], axis=0)
    domain_final = domain_final.groupby(['ENTITY', 'DATE'],
                                        as_index=False).agg({'VALUE_D': 'sum'})
    domain_final = (domain_final.
                    sort_values(["DATE"], ascending=True).
                    reset_index(drop=True))
    domain_final['VALUE'] = (domain_final.
                             groupby(['ENTITY'], as_index=True).
                             agg({'VALUE_D': 'cumsum'}))

    domain_final['DATE_ORDER'] = (pd.to_datetime(domain_final['DATE'],
                                                 format='%d/%m/%Y').
                                  dt.strftime('%Y%m%d'))
    domain_final['LAST_UPDATE'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
    return domain_final


DOMAIN301 = domain_301(DATAF)
if USE_MONGO:
    bob.mongo.save_df(DOMAIN301, '301', DB_APP, True)
DOMAIN301

In [None]:
DONE = BHC.done(BHC_KEY)
print(DONE)