In [1]:
import numpy as np
import json
from collections import Counter
import math

In [2]:
def loadResultsFile(resultsFilepath):
    with open(resultsFilepath, 'r') as fIn:
        data = json.load(fIn)
    return data

In [3]:
def getKeyphrases(keyphrasesJsonPath):
    with open(keyphrasesJsonPath, 'r') as inF:
        keyphrasesDict = json.load(inF)
    return keyphrasesDict

In [4]:
def getStats(resultFilepath):#, outputFolder):
    #os.makedirs(outputFolder)
    
    allResults = loadResultsFile(resultFilepath)
    # get the keyphrases in order for each topic (as presented in the UI):
    keyphrasesByTopic = getKeyphrases('keyphrasesSuggestedQueriesNgramCount.json')
    
    allTopicsIterationCounts = []
    allTopicsInitialRatings = []
    allTopicsQueryRatings = []
    allTopicsQueryTypes = []
    allTopicsQueryBehaviors = []
    allTopicsInitialTimes = []
    allTopicsQueryTimes = []
    allTopicsInitialSummLens = []
    allTopicsSummExpansionLens = []
    allTopicsFullExploreTimes = []
    allTopicsFinalLengths= []
    allTopicsTotalTimes = []
    allTopicsQuestionnaireTimes = []
    
    for topicId in allResults:
        iterationCounts = []
        initialRatings = []
        queryRatings = [] # list of lists
        queryTypes = [] # list of lists
        queryBehavior = [] # list of lists of 'h' (highlight), 'f' (freetext), 'm' (moreinfo), or <keyphraseIndex>
        initialTimes = []
        queryTimes = [] # list of lists
        initialSummLens = []
        summExpansionLens = [] # list of lists
        fullExploreTimes = []
        finalLengths = []
        totalTimes = []
        questionnaireTimes = []
        for session in allResults[topicId]:
            iterationCounts.append(len(session["scores"]))
            initialRatings.append(session["scores"][0]["rating"])
            queryRatings.append([s["rating"] for s in session["scores"][1:]])
            queryTypes.append([s["query"][1] for s in session["scores"][1:]])
            
            queryBehaviorInSession = []
            for s in session["scores"][1:]:
                queryType = s["query"][1]
                if queryType == 'highlight':
                    queryBehaviorInSession.append('h')
                elif queryType == 'freetext':
                    queryBehaviorInSession.append('f')
                elif queryType == 'moreinfo':
                    queryBehaviorInSession.append('m')
                elif queryType == 'keyword' or queryType == 'repeat':
                    queryText = s["query"][0].strip()
                    if queryText in keyphrasesByTopic[topicId]:
                        queryBehaviorInSession.append(keyphrasesByTopic[topicId].index(queryText))
                    else:
                        queryBehaviorInSession.append(-1)
            queryBehavior.append(queryBehaviorInSession)
            
            lastTime = 0
            allTimes = []
            lastLen = session["scores"][0]["summary_len"]
            allLens = [lastLen]
            for iteration in session["scores"][1:]:
                curTime = iteration["query"][2]
                allTimes.append(curTime - lastTime)
                lastTime = curTime
                curLen = iteration["summary_len"]
                allLens.append(curLen - lastLen)
                lastLen = curLen
            initialTimes.append(allTimes[0])
            queryTimes.append(allTimes[1:])
            initialSummLens.append(allLens[0])
            summExpansionLens.append(allLens[1:])
            fullExploreTimes.append(sum(allTimes))
            finalLengths.append(session["scores"][-1]["summary_len"])
            totalTimes.append(session['totalTime'])
            questionnaireTimes.append(session['questionnaireTime'])
            
        allTopicsIterationCounts.extend(iterationCounts)
        allTopicsInitialRatings.extend(initialRatings)
        allTopicsQueryRatings.extend(queryRatings)
        allTopicsQueryTypes.extend(queryTypes)
        allTopicsQueryBehaviors.extend(queryBehavior)
        allTopicsInitialTimes.extend(initialTimes)
        allTopicsQueryTimes.extend(queryTimes)
        allTopicsInitialSummLens.extend(initialSummLens)
        allTopicsSummExpansionLens.extend(summExpansionLens)
        allTopicsFullExploreTimes.extend(fullExploreTimes)
        allTopicsFinalLengths.extend(finalLengths)
        allTopicsTotalTimes.extend(totalTimes)
        allTopicsQuestionnaireTimes.extend(questionnaireTimes)
            
        showStats(topicId, iterationCounts, initialRatings, 
                  queryRatings, queryTypes, queryBehavior, initialTimes,
                  queryTimes, initialSummLens, summExpansionLens,
                  fullExploreTimes, finalLengths, totalTimes, questionnaireTimes)
        
        
        
    print('\n\n')
    showStats('ALL', allTopicsIterationCounts, allTopicsInitialRatings, 
              allTopicsQueryRatings, allTopicsQueryTypes, allTopicsQueryBehaviors, allTopicsInitialTimes,
              allTopicsQueryTimes, allTopicsInitialSummLens, allTopicsSummExpansionLens,
              allTopicsFullExploreTimes, allTopicsFinalLengths, allTopicsTotalTimes, allTopicsQuestionnaireTimes)

In [5]:
def showStats(topicId, iterationCounts, initialRatings,
              queryRatings, queryTypes, queryBehaviors, initialTimes,
              queryTimes, initialSummLens, summExpansionLens, fullExploreTimes, finalLengths,
              totalTimes, questionnaireTimes):
    
    allQueryRatings = [r for queryRatingsList in queryRatings for r in queryRatingsList]
    allQueryTypes = [t for queryTypesList in queryTypes for t in queryTypesList]
    allQueryTimes = [t for queryTimesList in queryTimes for t in queryTimesList]
    allSummExpansionLens = [l for summExpansionLensList in summExpansionLens for l in summExpansionLensList]
    allQueryTypesCounter = Counter(allQueryTypes)
    queryTypeNames = sorted(allQueryTypesCounter.keys())
    
    allQueryTypesFirstThird = [t for queryTypesList in queryTypes for t in queryTypesList[:math.ceil((1.0/3)*len(queryTypesList))]]
    allQueryTypesFirstThirdCounter = Counter(allQueryTypesFirstThird)
    allQueryTypesSecondThird = [t for queryTypesList in queryTypes for t in queryTypesList[math.ceil((1.0/3)*len(queryTypesList)):math.ceil((2.0/3)*len(queryTypesList))]]
    allQueryTypesSecondThirdCounter = Counter(allQueryTypesSecondThird)
    allQueryTypesThirdThird = [t for queryTypesList in queryTypes for t in queryTypesList[math.ceil((2.0/3)*len(queryTypesList)):]]
    allQueryTypesThirdThirdCounter = Counter(allQueryTypesThirdThird)
    
    print("topicId: {}".format(topicId))
    print('\tNum sessions:\t\t\t{}'.format(len(initialRatings)))
    print("\tIterations/session:\t\t{:.2f} ({:.2f})".format(np.mean(iterationCounts), np.std(iterationCounts)))
    print("\tInitial rating:\t\t\t{:.2f} ({:.2f})".format(np.mean(initialRatings), np.std(initialRatings)))
    print("\tInitial time (sec):\t\t{:.2f} ({:.2f})".format(np.mean(initialTimes), np.std(initialTimes)))
    print("\tInitial summ len (words):\t{:.2f} ({:.2f})".format(np.mean(initialSummLens), np.std(initialSummLens)))
    print("\tQuery ratings:\t\t\t{:.2f} ({:.2f})".format(np.mean(allQueryRatings), np.std(allQueryRatings)))
    print("\tQuery times (sec):\t\t{:.2f} ({:.2f})".format(np.mean(allQueryTimes), np.std(allQueryTimes)))
    print("\tExpansion lens (words):\t\t{:.2f} ({:.2f})".format(np.mean(allSummExpansionLens), np.std(allSummExpansionLens)))
    print("\tFull explore time (sec):\t{:.2f} ({:.2f})".format(np.mean(fullExploreTimes), np.std(fullExploreTimes)))
    print("\tFull questionnaire time (sec):\t{:.2f} ({:.2f})".format(np.mean(questionnaireTimes), np.std(questionnaireTimes)))
    print("\tFull time (sec):\t\t{:.2f} ({:.2f})".format(np.mean(totalTimes), np.std(totalTimes)))
    print("\tFull length (words):\t\t{:.2f} ({:.2f})".format(np.mean(finalLengths), np.std(finalLengths)))
    numAbove250Words = len([l for l in finalLengths if l > 250])
    print("\tNum sessions >250 words:\t{} ({:.2f}%)".format(numAbove250Words, float(numAbove250Words*100) / len(finalLengths)))
    for maxTimeSec in [360, 300, 240]:
        fullExploreTimesUnderLimit = [t for t in fullExploreTimes if t < maxTimeSec]
        print("\tFull explore time <{} sec:\t{:.2f} ({:.2f}) - {} of {} sessions ({:.2f}%)".format(maxTimeSec,
            np.mean(fullExploreTimesUnderLimit), np.std(fullExploreTimesUnderLimit),
            len(fullExploreTimesUnderLimit), len(fullExploreTimes), float(len(fullExploreTimesUnderLimit)*100) / len(fullExploreTimes)))
    
    for groupAllTypes, groupCounter, label in [(allQueryTypesFirstThird, allQueryTypesFirstThirdCounter, '1st'), (allQueryTypesSecondThird, allQueryTypesSecondThirdCounter, '2nd'), (allQueryTypesThirdThird, allQueryTypesThirdThirdCounter, '3rd')]:
        print('\t----------')
        for queryType in queryTypeNames:
            if queryType in groupCounter:
                queryTypeCount = groupCounter[queryType]
                countProportion = float(queryTypeCount) / len(groupAllTypes)
                print('\tQuery {} {}\t\t{} ({:.1f}% {}/{})'.format(queryType, label, '-'*(int(countProportion*20)), countProportion*100, queryTypeCount, len(groupAllTypes)))
            else:
                print('\tQuery {} {}\t\t{} ({:.1f}% {}/{})'.format(queryType, label, '|', 0.0, 0, len(groupAllTypes)))
    
    print('\t----------')
    for queryType in queryTypeNames:
        queryTypeCount = allQueryTypesCounter[queryType]
        countProportion = float(queryTypeCount) / len(allQueryTypes)
        print('\tQuery {} All\t\t{} ({:.1f}% {}/{})'.format(queryType, '-'*(int(countProportion*60)), countProportion*100, queryTypeCount, len(allQueryTypes)))
        
    print('\t----------')
    for queryBehavior in queryBehaviors:
        print('\t{}'.format(queryBehavior))
    
    print()
    

In [7]:
INPUT_RESULTS_FILE = '../MechanicalTurk/RealSessions/results_SummarizerClustering.json'
getStats(INPUT_RESULTS_FILE)

topicId: D0601
	Num sessions:			4
	Iterations/session:		16.00 (4.30)
	Initial rating:			0.75 (0.17)
	Initial time (sec):		45.80 (42.52)
	Initial summ len (words):	86.00 (0.00)
	Query ratings:			0.65 (0.25)
	Query times (sec):		15.13 (11.81)
	Expansion lens (words):		40.70 (11.24)
	Full explore time (sec):	257.65 (104.27)
	Full questionnaire time (sec):	85.44 (25.12)
	Full time (sec):		360.69 (115.24)
	Full length (words):		696.50 (128.03)
	Num sessions >250 words:	4 (100.00%)
	Full explore time <360 sec:	204.88 (57.92) - 3 of 4 sessions (75.00%)
	Full explore time <300 sec:	204.88 (57.92) - 3 of 4 sessions (75.00%)
	Full explore time <240 sec:	166.02 (22.43) - 2 of 4 sessions (50.00%)
	----------
	Query freetext 1st		------- (38.1% 8/21)
	Query highlight 1st		----- (28.6% 6/21)
	Query keyword 1st		----- (28.6% 6/21)
	Query repeat 1st		 (4.8% 1/21)
	----------
	Query freetext 2nd		-------- (40.0% 8/20)
	Query highlight 2nd		------ (30.0% 6/20)
	Query keyword 2nd		--- (15.0% 3/20)
	Query

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


In [8]:
INPUT_RESULTS_FILE = '../MechanicalTurk/RealSessions/results_SummarizerTextRankPlusLexical.json'
getStats(INPUT_RESULTS_FILE)

topicId: D0601
	Num sessions:			4
	Iterations/session:		11.25 (2.49)
	Initial rating:			0.85 (0.09)
	Initial time (sec):		39.38 (13.88)
	Initial summ len (words):	86.00 (0.00)
	Query ratings:			0.63 (0.25)
	Query times (sec):		26.10 (39.17)
	Expansion lens (words):		51.32 (9.94)
	Full explore time (sec):	280.80 (137.76)
	Full questionnaire time (sec):	111.42 (61.20)
	Full time (sec):		437.67 (113.53)
	Full length (words):		612.00 (132.75)
	Num sessions >250 words:	4 (100.00%)
	Full explore time <360 sec:	202.77 (30.80) - 3 of 4 sessions (75.00%)
	Full explore time <300 sec:	202.77 (30.80) - 3 of 4 sessions (75.00%)
	Full explore time <240 sec:	181.17 (4.87) - 2 of 4 sessions (50.00%)
	----------
	Query freetext 1st		- (6.2% 1/16)
	Query highlight 1st		------- (37.5% 6/16)
	Query keyword 1st		---------- (50.0% 8/16)
	Query repeat 1st		- (6.2% 1/16)
	----------
	Query freetext 2nd		- (7.7% 1/13)
	Query highlight 2nd		---- (23.1% 3/13)
	Query keyword 2nd		------------ (61.5% 8/13)
	Query 