> ## Google Analytics reports
>
> [Connection reference](https://janakiev.com/blog/python-google-analytics/)
> [Markdown basics](https://www.markdownguide.org/basic-syntax/)

**Environment settigs**

In [1]:
#load Libraries
import numpy as np
import pandas as pd
import pandas_gbq
import gspread
import polars as pl
import duckdb
from google.cloud import bigquery
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2 import service_account
from googleapiclient.discovery import build
from mega import Mega

In [6]:
# create function to send data to gsheets
def save_to_gsheets(df, sheet_name, worksheet_name):
    client = gspread.service_account(api)
    sheet = client.open(sheet_name)    
    worksheet = sheet.worksheet(worksheet_name)
    
    # convertimos el tipo de las columnas que sean datetime a string
    for column in df.columns[df.dtypes == 'datetime64[ns]']:
        df[column] = df[column].astype(str)

    # reemplazamos valores NaN por strings vacíos
    worksheet.update([df.columns.values.tolist()] + df.fillna('').values.tolist())

    print(f'DataFrame uploaded to: {sheet_name}, {worksheet_name}')

**Pull data from Analytics**

In [9]:
getm_view_id = '219175238' # GETM - ecommerce (prod)
mpe_view_id = '109327488' # Mi pedido epura app (master)
api = '../APIs/gepp-538-db.json'
scopes = ['https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive',
        'https://www.googleapis.com/auth/analytics.readonly']
# connect to google sheets
gs_credentials = ServiceAccountCredentials.from_json_keyfile_name(api, scopes)
gc = gspread.authorize(gs_credentials)
# connect to big query
bq_credentials = service_account.Credentials.from_service_account_file(api)
project_id = 'gepp-538'
client = bigquery.Client(credentials=bq_credentials,project=project_id)
# connect to analytics service
service = build('analyticsreporting','v4',credentials=gs_credentials)

In [10]:
def create_df(response):

    #create two empty lists that will hold our dimentions and sessions data
    row_list = []
  
    #Extract Data
    for report in response.get('reports', []):
        column_header = report.get('columnHeader', {})
        dimension_headers = column_header.get('dimensions', [])
        metric_headers = column_header.get('metricHeader', {}).get('metricHeaderEntries', [])
  
        for row in report.get('data', {}).get('rows', []):
            row_dict = {}
            dimensions = row.get('dimensions', [])
            date_range_values = row.get('metrics', [])
  
            for header, dimension in zip(dimension_headers, dimensions):
                row_dict[header] = dimension
  
            for i, values in enumerate(date_range_values):
                for metric, value in zip(metric_headers, values.get('values')):
                    if ',' in value or '.' in value:
                        row_dict[metric.get('name')] = float(value)
                    else:
                        row_dict[metric.get('name')] = int(value)
            row_list.append(row_dict)
    return pd.DataFrame(row_list)

### General query
(Date, users, newUsers, sessions, bounceRate, pageviews, etc)

In [7]:
# view id
view_id = getm_view_id #mpe_view_id
# add start and end dates
inicio = '30daysago'
fin = 'yesterday'
# add query parameters
dimension = 'ga:date'
metric1 = 'ga:users'
metric2 = 'ga:newUsers'
metric3 = 'ga:sessions'
metric4 = 'ga:bounceRate'
metric5 = 'ga:pageviews'
metric6 = 'ga:pageviewsPerSession'
metric7 = 'ga:avgSessionDuration'
metric8 = 'ga:avgPageLoadTime'

In [8]:
# execute query
response = service.reports().batchGet(
body={
        'reportRequests': [
            {
                'viewId': view_id,
                'dateRanges': [{'startDate': inicio, 'endDate': fin}],
                            'metrics': [
                                        {'expression': metric1},
                                        {'expression': metric2},
                                        {'expression': metric3},
                                        {'expression': metric4},
                                        {'expression': metric5},
                                        {'expression': metric6},
                                        {'expression': metric7},
                                        {'expression': metric8},],
                'dimensions': [{"name": dimension}],
            }]
    }
).execute()

metrics = create_df(response)

In [9]:
# rename columns
metrics.columns = ['Fecha','Usuarios','Nuevos usuarios','Sesiones','Tasa de rebote',
    'Páginas vistas','Páginas vistas por sesión','Duración prom sesión (mins)','Tiempo carga prom página']
# convert to datetime
metrics['Fecha'] = pd.to_datetime(metrics['Fecha'])
# divide by 60 to convert to minutes
metrics['Duración prom sesión (mins)'] = metrics['Duración prom sesión (mins)']/60
# sort by date desc
metrics = metrics.sort_values(by='Fecha', ascending=False)

In [None]:
# clear sheets before sending new data
# open spreadsheet
gsheet = gc.open('GETM mensual')
# select sheet1
sheet1 = gsheet.worksheet('metricas')
# clear sheet1
sheet1.clear()

{'spreadsheetId': '1uOOpMXaIDT5MijuJFa0wN5Gy0dFVro8yQygDKgBDDMI',
 'clearedRange': 'metricas!A1:J31'}

In [None]:
# send to google sheets
save_to_gsheets(metrics, 'GETM mensual', 'metricas')

DataFrame uploaded to: GETM mensual, metricas


In [None]:
# execute query
response = service.reports().batchGet(
body={
        'reportRequests': [
            {
                'viewId': view_id,
                'dateRanges': [{'startDate': inicio, 'endDate': fin}],
                            'metrics': [
                                        {'expression': 'ga:productRevenuePerPurchase'},
                                        {'expression':'ga:uniquePurchases'},
                                        {'expression':'ga:itemQuantity'},
                                        {'expression':'ga:revenuePerItem'},
                                        {'expression':'ga:itemsPerPurchase'},
                                        {'expression':'ga:itemRevenue'},
                                        ],
                'dimensions': [{"name": 'ga:productName'}],
            }]
    }
).execute()

products = create_df(response)

In [None]:
products

Unnamed: 0,ga:productName,ga:productRevenuePerPurchase,ga:uniquePurchases,ga:itemQuantity,ga:revenuePerItem,ga:itemsPerPurchase,ga:itemRevenue
0,(not set),115.902662,1401,2071,78.406388,1.478230,162379.63
1,7 up libre 2 lts pet 8,197.795455,44,46,189.195652,1.045455,8703.00
2,7 up libre 600 ml pet 12,162.706161,211,213,161.178404,1.009479,34331.00
3,7 up ra 0.5 lts bc grb 6,91.982143,168,274,56.397810,1.630952,15453.00
4,7 up ra 1.25 lts bc grb 4,68.576923,65,75,59.433333,1.153846,4457.50
...,...,...,...,...,...,...,...
395,vita fresa 2 lts pet 8,206.833333,144,146,204.000000,1.013889,29784.00
396,vita fresa 3l pet 8,280.057471,87,89,273.764045,1.022989,24365.00
397,vita fresa 400 ml pet 12,121.828767,73,77,115.500000,1.054795,8893.50
398,vita fresa 600 ml pet 12,187.741176,85,101,158.000000,1.188235,15958.00


### Geographic map and new users

In [None]:
response = service.reports().batchGet(body={
    'reportRequests': [{
        'viewId': view_id,
        'dateRanges': [{'startDate': inicio, 'endDate': fin}],
        'metrics': [
            {"expression": "ga:users"},
            {"expression": "ga:newUsers"},
        ], "dimensions": [
            {"name": "ga:country"},
            {"name": "ga:region"},
            {"name": "ga:city"},            
            {"name": "ga:longitude"},
            {"name": "ga:latitude"}
        ], "samplingLevel": "LARGE",
    }]}).execute()

geo = create_df(response)
geo['ga:latitude'] = pd.to_numeric(geo['ga:latitude'])
geo['ga:longitude'] = pd.to_numeric(geo['ga:longitude'])

In [None]:
#geo = geo[geo['ga:country']=='Mexico']
#geo.columns = ['pais', 'estado', 'ciudad', 'longitud', 'latitud','usuarios', 'nuevos_usuarios']
#geo = geo[geo['longitud']!=0]
#geo.to_csv('geo.csv')

### Transactions and revenue

In [None]:
response = service.reports().batchGet(body={
    'reportRequests': [{
        'viewId': view_id,
        'dateRanges': [{'startDate': inicio, 'endDate': fin}],
        'metrics': [
            {"expression": "ga:transactions"},
            #{"expression": ",ga:transactionsPerSession"}, #ecommerce conversion rate
            {"expression": "ga:transactionRevenue"},
            {"expression": "ga:revenuePerTransaction"}, # avg order value
            {"expression": "ga:itemQuantity"},
        ], "dimensions": [
            {'name':'ga:date'},
        ]
    }]}).execute()

trans = create_df(response)

In [None]:
mpe_view_id
response = service.reports().batchGet(body={
    'reportRequests': [{
        'viewId': mpe_view_id,
        'dateRanges': [{'startDate': inicio, 'endDate': fin}],
        'metrics': [
            {"expression": "ga:transactions"},
            #{"expression": ",ga:transactionsPerSession"}, #ecommerce conversion rate
            {"expression": "ga:transactionRevenue"},
            {"expression": "ga:revenuePerTransaction"}, # avg order value
            {"expression": "ga:itemQuantity"},
        ], "dimensions": [
            {'name':'ga:date'},
        ]
    }]}).execute()

mpe_trans = create_df(response)

In [None]:
getm_trans = trans[['ga:date','ga:transactions','ga:transactionRevenue']]
mpe_trans = mpe_trans[['ga:date','ga:transactions','ga:transactionRevenue']]

In [None]:
ambas = getm_trans.merge(mpe_trans, on='ga:date', how='inner', suffixes=('_getm','_mpe'))

In [None]:
#ambas.columns = ambas.columns.str.replace('ga:','')
#ambas['date'] = pd.to_datetime(ambas['date'])
ambas.to_csv('ambas.csv')

In [None]:
df['ga:date'] = pd.to_datetime(df['ga:date'])

In [None]:
mensual = df.groupby([pd.Grouper(key='ga:date', freq='M'),])['ga:transactions'].sum().reset_index()
mensual['ga:date'] = mensual['ga:date'].dt.strftime('%Y-%m')

In [None]:
fig = px.line(mensual, x='ga:date', y='ga:transactions', height=600, width=900,
            title="Transacciones por mes")
fig.show()

In [None]:
mensual.to_csv('transacciones.csv')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d2538dfe-e612-40a9-bfad-7174c7274ee1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>

## Contact

<!-- Avatar -->
<img src="../Pictures/profile2.png" alt="me" width="75" height="80">
<!-- Text with color, font, fontsize and specific size -->
<p style="color:#323232; font-family: Helevetica; font-size: 20px;">Jesus L. Monroy<br>Economist | Data Scientist</p>
<!-- Insert url links in logos -->
<!-- Telegram -->
<a href="https://t.me/j3suslm" target="_blank" rel="noreferrer"> <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Telegram_X_2019_Logo.svg/2048px-Telegram_X_2019_Logo.png?size=16&color=3b3b3b" alt="telegram" width="30" height="22" style="padding-left:8px"/>
<!-- Twitter -->
<a href="https://www.twitter.com/sqlalchemist" target="_blank" rel="noreferrer"> <img src="https://toppng.com/public/uploads/preview/twitter-x-new-logo-round-icon-png-11692480241tdbz6jparr.webp?size=16&color=3b3b3b" alt="twitter" width="30" height="22" style="padding-left:8px"/>
<!-- Github -->
<a href="https://github.com/SqlAlchemist/My-portfolio" target="_blank" rel="noreferrer"> <img src="https://icongr.am/devicon/github-original.svg?size=16&color=3b3b3b" alt="github" width="30" height="30" style="padding-left:8px"/>
<!-- Linkedin -->
<a href="https://www.linkedin.com/in/j3sus-lmonroy" target="_blank" rel="noreferrer"> <img src="https://icongr.am/simple/linkedin.svg?size=16&color=3b3b3b" alt="linkedin" width="30" height="30" style="padding-left:8px"/>
<!-- Medium -->
<a href="https://medium.com/@jesus_lmonroy" target="_blank" rel="noreferrer"> <img src="https://cdn1.iconfinder.com/data/icons/social-media-and-logos-12/32/Logo_medium-512.png?size=55&color=3b3b3b" alt="medium" width="30" height="33" style="padding-left:8px"/>