## Tables and Data Source setup

In [None]:
using ODBC
using DataFrames
using DSWB
using Formatting
using URIParser

dsn = "dswb-natgeo" # Redshift esetTable(tableRt, tableType = "RESOURCE_TABLE")ndpoint
table = "beacons_4744" # beacon table name
tableRt = "beacons_4744_rt"

# Connect to Beacon Data
setRedshiftEndpoint(dsn)
setTable(table)
setTable(tableRt, tableType = "RESOURCE_TABLE")
;

In [None]:
include("../../Lib/Include-Package-v2.1.jl")
include("../../Lib/URL-Classification-Package-v2.0.jl")
;

In [None]:
#TV = timeVariables(2017,11,15,23,59,2017,11,16,23,59)
#TV = weeklyTimeVariables(days=7)
TV = yesterdayTimeVariables()
;

In [None]:
#WellKnownHostDirectory = wellKnownHostEncyclopedia(SP.debug);
#WellKnownPath = wellKnownPathDictionary(SP.debug);

In [None]:

        localTableDF = query("""\
        select * from $tableRt
            where 
            "timestamp" between $(TV.startTimeMs) and $(TV.endTimeMs) 
            limit 3
        """)

        println("$table count is ",size(localTableDF))        
    

#            user_agent_device_type ilike '$(deviceType)' and
#            page_group ilike '$(pageGroup)' and 
#            timers_t_done >= $(rangeLowerMs) and timers_t_done < $(rangeUpperMs)
        



In [None]:
display(localTableDF)

In [None]:

        localTableDF = query("""\
        select * from $tableRt
            where 
            "timestamp" between $(TV.startTimeMs) and $(TV.endTimeMs) and
            url ilike '%dc8xl0ndzn2cb.cloudfront.net/js/nationalgeographic/v3/keywee.min.js%'
        """)

        println("$table count is ",size(localTableDF))        
    

#            user_agent_device_type ilike '$(deviceType)' and
#            page_group ilike '$(pageGroup)' and 
#            timers_t_done >= $(rangeLowerMs) and timers_t_done < $(rangeUpperMs)
        



In [None]:
function estimateResources(tableRt::ASCIIString, startTimeMs::Int64, endTimeMs::Int64; 
    pageGroup::ASCIIString="%", localResource::ASCIIString="%", deviceType::ASCIIString="%", rangeLowerMs::Float64=1000.0, rangeUpperMs::Float64=600000.0
    )

    try
        localTableDF = query("""\
        select * from $tableRt
            where 
            "timestamp" between $startTimeMs and $endTimeMs and
            url ilike '$(localResource)'
        """)

#            user_agent_device_type ilike '$(deviceType)' and
#            page_group ilike '$(pageGroup)' and 
#            timers_t_done >= $(rangeLowerMs) and timers_t_done < $(rangeUpperMs)
        
        return localTableDF
    catch y
        println("urlDetailTables Exception ",y)
    end
end



In [None]:
studyResource(tableRt,TV.startTimeMs,TV.endTimeMs,TV.timeString,"News Article","%dc8xl0ndzn2cb.cloudfront.net/js/nationalgeographic/v3/keywee.min.js%","Mobile")

In [None]:
function studyResource(
    tableRt::ASCIIString,startTimeMs::Int64,endTimeMs::Int64,timeString::ASCIIString,productPageGroup::ASCIIString,localResource::ASCIIString,deviceType::ASCIIString
    )
    try
        # Is there data?
        localTableDF = estimateResources(table,startTimeMs,endTimeMs,pageGroup=productPageGroup,localResource=localResource,deviceType=deviceType)
        println("$table count is ",size(localTableDF))        
        
        # Stats on the data
        statsDF = DataFrame()
        dv = localTableDF[:timers_t_done]
        statsDF = basicStatsFromDV(dv)

        displayTitle(chart_title = "Beacon Data Stats for $(productPageGroup)", chart_info = [timeString],showTimeStamp=false)
        beautifyDF(statsDF[:,:])
        
        rangeLower = statsDF[1:1,:q25][1]
        rangeUpper = statsDF[1:1,:q75][1]

        studyTime = 0
        studySession = "None"

        toppageurl = DataFrame()
        if studyTime > 0
            toppageurl = sessionUrlTableDF(TV,UP,SP,studySession,studyTime)
            elseif (studySession != "None")
            toppageurl = allSessionUrlTableDF(TV,UP,SP,studySession)
            else
            toppageurl = allPageUrlTableDF(TV,UP)
        end

        toppageurl = names!(toppageurl[:,:],
        [symbol("urlpagegroup"),symbol("Start"),symbol("Total"),symbol("Redirect"),symbol("Blocking"),symbol("DNS"),
            symbol("TCP"),symbol("Request"),symbol("Response"),symbol("Gap"),symbol("Critical"),symbol("urlgroup"),
            symbol("request_count"),symbol("label"),symbol("load_time"),symbol("beacon_time")]);
        

        # Debug
        toppageurlbackup = deepcopy(toppageurl);        
        
        # Debug
        toppageurl = deepcopy(toppageurlbackup)
        
        removeNegitiveTime(toppageurl,:Total)
        removeNegitiveTime(toppageurl,:Redirect)
        removeNegitiveTime(toppageurl,:Blocking)
        removeNegitiveTime(toppageurl,:DNS)
        removeNegitiveTime(toppageurl,:TCP)
        removeNegitiveTime(toppageurl,:Request)
        removeNegitiveTime(toppageurl,:Response)

        summaryStatsDF = DataFrame()
        dv = toppageurl[:Total]
        summaryStatsDF = basicStatsFromDV(dv)

        displayTitle(chart_title = "RT Data Stats for $(productPageGroup)", chart_info = [timeString],showTimeStamp=false)
        beautifyDF(summaryStatsDF[:,:])

        scrubUrlToPrint(toppageurl,:urlgroup);
        classifyUrl(toppageurl);        

        summaryPageGroup = summarizePageGroups(toppageurl)
        beautifyDF(summaryPageGroup[1:min(end,100),:])        
        
        # This is the non-Url specific report so get the summary table and overwrite toppageurl
        toppageurl = deepcopy(summaryPageGroup);        
        
#        itemCountTreemap(TV,UP,SP,toppageurl)      
        endToEndTreemap(TV,UP,SP,toppageurl)        
#        blockingTreemap(TV,UP,SP,toppageurl)        
#        requestTreemap(TV,UP,SP,toppageurl)
#        responseTreemap(TV,UP,SP,toppageurl)
#        dnsTreemap(TV,UP,SP,toppageurl)
#        tcpTreemap(TV,UP,SP,toppageurl)
#        redirectTreemap(TV,UP,SP,toppageurl)
    catch y
        println("typeAll Exception ",y)
    end  

end

function summarizePageGroupsMaybe(toppageurl::DataFrame)
    try
        summaryPageGroup = DataFrame()
        summaryPageGroup[:urlpagegroup] = "Grand Total"
        summaryPageGroup[:Start] = 0
        summaryPageGroup[:Total] = 0
        summaryPageGroup[:Redirect] = 0
        summaryPageGroup[:Blocking] = 0
        summaryPageGroup[:DNS] = 0
        summaryPageGroup[:TCP] = 0
        summaryPageGroup[:Request] = 0
        summaryPageGroup[:Response] = 0
        summaryPageGroup[:Gap] = 0
        summaryPageGroup[:Critical] = 0
        summaryPageGroup[:urlgroup] = ""
        summaryPageGroup[:request_count] = 0
        summaryPageGroup[:label] = ""
        summaryPageGroup[:load_time] = 0.0
        summaryPageGroup[:beacon_time] = 0.0

        for subDf in groupby(toppageurl,:urlpagegroup)
            #println(subDf[1:1,:urlpagegroup]," ",size(subDf,1))
            Total = 0
            Redirect = 0
            Blocking = 0
            DNS = 0
            TCP = 0
            Request = 0
            Response = 0
            Gap = 0
            Critical = 0
            request_count = 0
            load_time = 0.0
            beacon_time = 0

            for row in eachrow(subDf)
                #println(row)
                Total += row[:Total]
                Redirect += row[:Redirect]
                Blocking += row[:Blocking]
                DNS += row[:DNS]
                TCP += row[:TCP]
                Request += row[:Request]
                Response += row[:Response]
                Gap += row[:Gap]
                Critical += row[:Critical]
                request_count += row[:request_count]
                load_time += row[:load_time]
                beacon_time += row[:beacon_time]        
            end
            #convert to seconds
            load_time = (Total / request_count) / 1000
            push!(summaryPageGroup,[subDf[1:1,:urlpagegroup];0;Total;Redirect;Blocking;DNS;TCP;Request;Response;Gap;Critical;subDf[1:1,:urlpagegroup];request_count;"Label";load_time;beacon_time])
        end    

        sort!(summaryPageGroup,cols=[order(:Total,rev=true)])
        return summaryPageGroup
    catch y
        println("summarizePageGroup Exception ",y)
    end          
end


In [None]:
function typeAllBody(
    table::ASCIIString,startTimeMs::Int64,endTimeMs::Int64,timeString::ASCIIString,productPageGroup::ASCIIString,localUrl::ASCIIString,deviceType::ASCIIString
    )
    try
        # Is there data?
        localTableDF = defaultBeaconsToDF(TV,UP,SP)
        println("$table count is ",size(localTableDF))        
        
        # Stats on the data
        statsDF = DataFrame()
        dv = localTableDF[:timers_t_done]
        statsDF = basicStatsFromDV(dv)

        displayTitle(chart_title = "Beacon Data Stats for $(productPageGroup)", chart_info = [timeString],showTimeStamp=false)
        beautifyDF(statsDF[:,:])
        
        rangeLower = statsDF[1:1,:q25][1]
        rangeUpper = statsDF[1:1,:q75][1]

        studyTime = 0
        studySession = "None"

        toppageurl = DataFrame()
        if studyTime > 0
            toppageurl = sessionUrlTableDF(TV,UP,SP,studySession,studyTime)
            elseif (studySession != "None")
            toppageurl = allSessionUrlTableDF(TV,UP,SP,studySession)
            else
                toppageurl = allPageUrlTableDF(TV,UP)
        end

        toppageurl = names!(toppageurl[:,:],
        [symbol("urlpagegroup"),symbol("Start"),symbol("Total"),symbol("Redirect"),symbol("Blocking"),symbol("DNS"),
            symbol("TCP"),symbol("Request"),symbol("Response"),symbol("Gap"),symbol("Critical"),symbol("urlgroup"),
            symbol("request_count"),symbol("label"),symbol("load_time"),symbol("beacon_time")]);
        

        # Debug
        toppageurlbackup = deepcopy(toppageurl);        
        
        # Debug
        toppageurl = deepcopy(toppageurlbackup)
        
        removeNegitiveTime(toppageurl,:Total)
        removeNegitiveTime(toppageurl,:Redirect)
        removeNegitiveTime(toppageurl,:Blocking)
        removeNegitiveTime(toppageurl,:DNS)
        removeNegitiveTime(toppageurl,:TCP)
        removeNegitiveTime(toppageurl,:Request)
        removeNegitiveTime(toppageurl,:Response)

        summaryStatsDF = DataFrame()
        dv = toppageurl[:Total]
        summaryStatsDF = basicStatsFromDV(dv)

        displayTitle(chart_title = "RT Data Stats for $(productPageGroup)", chart_info = [timeString],showTimeStamp=false)
        beautifyDF(summaryStatsDF[:,:])

        scrubUrlToPrint(toppageurl,:urlgroup);
        classifyUrl(toppageurl);        

        summaryPageGroup = summarizePageGroups(toppageurl)
        beautifyDF(summaryPageGroup[1:min(end,100),:])        
        
        # This is the non-Url specific report so get the summary table and overwrite toppageurl
        toppageurl = deepcopy(summaryPageGroup);        
        
#        itemCountTreemap(TV,UP,SP,toppageurl)      
        endToEndTreemap(TV,UP,SP,toppageurl)        
#        blockingTreemap(TV,UP,SP,toppageurl)        
#        requestTreemap(TV,UP,SP,toppageurl)
#        responseTreemap(TV,UP,SP,toppageurl)
#        dnsTreemap(TV,UP,SP,toppageurl)
#        tcpTreemap(TV,UP,SP,toppageurl)
#        redirectTreemap(TV,UP,SP,toppageurl)
    catch y
        println("typeAll Exception ",y)
    end  

end

function summarizePageGroups(toppageurl::DataFrame)
    try
        summaryPageGroup = DataFrame()
        summaryPageGroup[:urlpagegroup] = "Grand Total"
        summaryPageGroup[:Start] = 0
        summaryPageGroup[:Total] = 0
        summaryPageGroup[:Redirect] = 0
        summaryPageGroup[:Blocking] = 0
        summaryPageGroup[:DNS] = 0
        summaryPageGroup[:TCP] = 0
        summaryPageGroup[:Request] = 0
        summaryPageGroup[:Response] = 0
        summaryPageGroup[:Gap] = 0
        summaryPageGroup[:Critical] = 0
        summaryPageGroup[:urlgroup] = ""
        summaryPageGroup[:request_count] = 0
        summaryPageGroup[:label] = ""
        summaryPageGroup[:load_time] = 0.0
        summaryPageGroup[:beacon_time] = 0.0

        for subDf in groupby(toppageurl,:urlpagegroup)
            #println(subDf[1:1,:urlpagegroup]," ",size(subDf,1))
            Total = 0
            Redirect = 0
            Blocking = 0
            DNS = 0
            TCP = 0
            Request = 0
            Response = 0
            Gap = 0
            Critical = 0
            request_count = 0
            load_time = 0.0
            beacon_time = 0

            for row in eachrow(subDf)
                #println(row)
                Total += row[:Total]
                Redirect += row[:Redirect]
                Blocking += row[:Blocking]
                DNS += row[:DNS]
                TCP += row[:TCP]
                Request += row[:Request]
                Response += row[:Response]
                Gap += row[:Gap]
                Critical += row[:Critical]
                request_count += row[:request_count]
                load_time += row[:load_time]
                beacon_time += row[:beacon_time]        
            end
            #convert to seconds
            load_time = (Total / request_count) / 1000
            push!(summaryPageGroup,[subDf[1:1,:urlpagegroup];0;Total;Redirect;Blocking;DNS;TCP;Request;Response;Gap;Critical;subDf[1:1,:urlpagegroup];request_count;"Label";load_time;beacon_time])
        end    

        sort!(summaryPageGroup,cols=[order(:Total,rev=true)])
        return summaryPageGroup
    catch y
        println("summarizePageGroup Exception ",y)
    end          
end


In [None]:
#typeAllBodyQuick(TV,UP,SP,"News Article","%","Mobile")


#typeAllBodyQuick(TV,UP,SP,"Your Shot","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Channel","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Adventure","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Photography","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Kids","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Video","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Nat Geo Site","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"Nat Geo Homepage","%","Mobile")
#typeAllBodyQuick(TV,UP,SP,"TravelAEM","%","Mobile")