## Tables and Data Source setup

In [1]:
using ODBC
using DataFrames
using DSWB
using Formatting
using URIParser

dsn = "dswb-natgeo" # Redshift esetTable(tableRt, tableType = "RESOURCE_TABLE")ndpoint
table = "beacons_4744" # beacon table name
tableRt = "beacons_4744_rt"

# Connect to Beacon Data
setRedshiftEndpoint(dsn)
setTable(table)
setTable(tableRt, tableType = "RESOURCE_TABLE")
;

Redshift endpoint set: 2017-11-22T12:45:21


In [3]:
# Time & Structures First
include("../../Lib/Include-Package-v2.1.jl")
;

In [4]:
#TV = timeVariables(2017,10,27,23,59,2017,11,3,23,59)
#TV = weeklyTimeVariables(days=7)
TV = yesterdayTimeVariables()
;

November 21, 2017 06:59:00 to November 21, 2017 16:59:00 Local Time
November 21, 2017 11:59:00 to November 21, 2017 21:59:00 UTC Time


In [5]:
UP = UrlParamsInit()
UP.agentOs = "%"
UP.beaconTable = table
UP.deviceType = "%"
UP.limitRows = 10
#UP.limitRows = 250
UP.orderBy = "time"
UP.pageGroup = "%"   #productPageGroup
UP.resourceTable = tableRt
UP.samplesMin = 10
UP.sizeMin = 10000
UP.timeLowerMs = 2000.0
UP.timeUpperMs = 60000.0
UP.urlRegEx = "%"   #localUrl
UP.urlFull = "%"
UP.usePageLoad=false
UP.btView = "$(table)_all_dt_view_prod" #localtable
UP.rtView = "$(tableRt)_all_dt_view_prod" #localtableRt

SP = ShowParamsInit()
SP.desktop=false
SP.mobile=false
SP.devView=false
SP.criticalPathOnly=true
SP.debug=false
SP.debugLevel = 0   # Tests use even numbers with > tests, make this an odd number or zero

type LocalVars
    reportLevel::Int64
end

LV = LocalVars(10) # 1 for min output, 5 medium output, 10 all output
;

In [7]:
toppageurl = query("""\
    select 
       'None' as urlpagegroup,
       CASE WHEN (position('?' in url) > 0) then trim('/' from (substring(url for position('?' in substring(url from 9)) +7))) else trim('/' from url) end as urlgroup
      FROM $(UP.resourceTable)
      where
      "timestamp" between $(TV.startTimeMsUTC) and $(TV.endTimeMsUTC)
       group by urlgroup,urlpagegroup
 """);    

In [8]:
#save for debug
toppageurlbackup = deepcopy(toppageurl);        

In [9]:
include("../../Lib/URL-Classification-Package-v2.0.jl")

WellKnownHost = wellKnownHostDictionary(SP.debug);
WellKnownPath = wellKnownPathDictionary(SP.debug);


In [10]:
include("../../Lib/URL-Classification-Package-v2.0.jl")

# Debug
toppageurl = deepcopy(toppageurlbackup)

scrubUrlToPrint(toppageurl);
classifyUrl(toppageurl,showProblems=false);        
#

Host adservice.google.com Path /adsid/integrator.js
Host script.crazyegg.com Path /pages/scripts/0018/0221.js
Host d3qdfnco3bamip.cloudfront.net Path /wjs/v3.0.1510876983/javascripts/livefyre_mod_main.js
Host script.crazyegg.com Path /pages/scripts/0018/0221.js
Yourshot to do: Classify /profile/1250335
Yourshot to do: Classify /akam/10/pixel_6be5411
Yourshot to do: Classify /about
Host d3qdfnco3bamip.cloudfront.net Path /wjs/v3.0.1510876983/javascripts/livefyre_base.js
Yourshot to do: Classify /akam/10/3f24616
Host widget.surveymonkey.com Path /collect/website/js/cookie.js
Host delivery.b.switchadhub.com Path /adserver/sat.js
Host vdna.exelator.com Path /load
Host adservice.google.com Path /adsid/integrator.sync.js
Yourshot to do: Classify /profile/382267
Host d3qdfnco3bamip.cloudfront.net Path /wjs/v3.0.1510876983/css/editor.css
Yourshot to do: Classify /daily-dozen
Yourshot to do: Classify /akam/10/425e1151
Yourshot to do: Classify /photos/11306564
Host d3qdfnco3bamip.cloudfront.net 

Excessive output truncated after 524291 bytes.


Yourshot to do: Classify /akam/10/876852c
Yourshot to do: Classify /photos/11240816
Yourshot to do: Classify /photos/2758901
Yourshot to do: Classify /akam/10/pixel_1a2f5a2d
Yourshot to do: Classify /akam/10/pixel_7d0a61a9
Yourshot to do: Classify /photos/11287062
Yourshot to do: Classify /tags/trees
Host ade.googlesyndication.com Path /ddm/activity/dc_oe=ChMIxoeqws_P1wIVSWTBCh2m_QGJEAAYACC4-YAt_met=1_ecn1=1_etm1=0_eid1=1...
Yourshot to do: Classify /photos/11325830
Yourshot to do: Classify /profile/1620913
Host b.fox.com Path /b/ss/fsnatgeowebprod,fsfbcglobalprod/1/JS-1.5/s26314169999088
Yourshot to do: Classify /profile/353716
Host ade.googlesyndication.com Path /ddm/activity/dc_oe=ChMIsqrQ8tvP1wIV0HUBCh3BLApsEAAYACCipYEt_met=1_ecn1=1_etm1=0_eid1=1...
Host ade.googlesyndication.com Path /ddm/activity/dc_oe=ChMIsqrQ8tvP1wIV0HUBCh3BLApsEAAYACCipYEt_met=1_ecn1=1_etm1=0_eid1=1...
Host ade.googlesyndication.com Path /ddm/activity/dc_oe=ChMI7cmzgdLP1wIV1kI3Ch3z3wyGEAAYACCipYEt_met=1_ecn1=