In [None]:
# Always run this first
# Don't touch anything here
from googleanalytics import initialise_analytics, create_ga_report
from screamingfrog import create_sf_report
from pagespeed import create_pagespeed_report
from utils import merge_ga_sf_data, merge_pagespeed_data
import time
import re
import pandas as pd

In [None]:
# Sets a timer -- optional
START = time.monotonic()

In [None]:
# Account credentials to use
# Credentials for each ga account are stored locally the first time they're used
GA_ACCOUNT = 'ga.wolfgang@'
VIEW_ID = '79256570'

# The full base url excluding trailing slash
# Used for Screaming Frog and to join datasets
BASE_URL = 'https://www.cornmarket.ie'

# Date range
START_DATE = '2020-01-01'
END_DATE = '2020-02-24'

# Screaming Frog config file name
SF_CONFIG = 'sf_config_default.seospiderconfig'

In [None]:
# Run this to authorise the current Google Analytics account
analytics = initialise_analytics(GA_ACCOUNT)

In [None]:
# Run this to create a csv report from Google Analytics
create_ga_report(analytics, BASE_URL, VIEW_ID, START_DATE, END_DATE)

In [None]:
# Run this to create csv reports from Screaming Frog
create_sf_report(BASE_URL, SF_CONFIG)

In [None]:
# Merge GA and Screaming Frog data and display the top 10 pages sorted by sessions
df = merge_ga_sf_data()
df.sort_values(by='sessions', ascending=False).head(10)

In [None]:
# Run this to create a csv page speed report for urls with status code = 200 & sessions > 0
await create_pagespeed_report()

In [None]:
# Merge page speed data and and display the top 10 pages sorted by sessions
df = merge_pagespeed_data()
df['landing_page'] = df['landing_page'].apply(lambda x: re.sub('https?://www\.[^/]+', '', x))
pd.options.display.float_format = '{:.2f}'.format
df.sort_values(by='sessions', ascending=False).head(10)

In [None]:
# Display duration -- optional
seconds = time.monotonic() - START
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
print(f'Total Duration: {h:.0f}h {m:.0f}m {s:.0f}s')