# Per session and per user analysis

Analysis of users.

# Table of Contents

1.  [Preparation](#preparation)

2.  [Constants](#constants)

3.  [Functions](#functions)

# Preparation
<a id=preparation />

In [None]:
%run "../Functions/2. Game sessions.ipynb"

# Constants
<a id=constants />

In [None]:
perSessionRelevantColumns = ['sessionId', 'serverTime', 'section']

reachEvents = df152[df152['type']=='reach'].loc[:,perSessionRelevantColumns]
deathEvents = df152[df152['type']=='death'].loc[:,perSessionRelevantColumns]

timedSectionsIndex = [
                        'tutorial1.Checkpoint00',
                        'tutorial1.Checkpoint01',
                        'tutorial1.Checkpoint02',
                        'tutorial1.Checkpoint03',
                        'tutorial1.Checkpoint04',
                        'tutorial1.Checkpoint05',
                        'tutorial1.Checkpoint06',
                        'tutorial1.Checkpoint07',
                        'tutorial1.Checkpoint08',
                        'tutorial1.Checkpoint09',
                        'tutorial1.Checkpoint10',
                        'tutorial1.Checkpoint11',
                        'tutorial1.Checkpoint12',
                        'tutorial1.Checkpoint13',
                        'tutorial1.Checkpoint14',
                     ]
timedSectionsReachedColumns = ['serverTime', 'firstReached', 'firstCompletionDuration']
timedSectionsDeathsColumns = ['deathsCount']
eventSectionsCountColumns = ['section', 'count']
eventSectionsColumns = ['count']

# Functions
<a id=functions />

In [None]:
## Comparison between game and Google form performance

In [None]:
# Returns a given session's checkpoints, the first server time at which they were reached, and completion time
def getCheckpointsTimes( sessionId ):
    perSession = reachEvents[reachEvents['sessionId']==sessionId]
    perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
    timedSections = perSession.groupby("section").agg({ "serverTime": np.min })
    timedSections["firstReached"] = pd.to_datetime(timedSections["serverTime"])
    timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()
    return timedSections

In [None]:
# Returns a given session's checkpoints, and death count
def getDeaths( sessionId ):
    perSession = deathEvents[deathEvents['sessionId']==sessionId]
    perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
    deathsSections = perSession.groupby("section").size().reset_index(name='deathsCount')
    return deathsSections

In [None]:
# Returns a given user's checkpoints, the first server time at which they were reached, and completion time
def getCheckpointsTimesUser( userId ):
    #print("getCheckpointsTimesUser(" + str(userId) + ")")
    
    # List of associated sessions
    sessionsList = getAllSessionsOfUser( df152, userId, True )
    #print("sessionsList=" + str(sessionsList))
    
    # Call getCheckpointsTimes on all sessions associated with user,
    # then merge by taking oldest checkpoint completion
    timedSections = pd.DataFrame(data=None, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
    
    for sessionId in sessionsList['sessionId']:        
        #print("processing user " + str(userId) + " with session " + str(sessionId))
        times = getCheckpointsTimes( sessionId )
        
        # merge
        # for each checkpoint reached, update if necessary
        for checkpointName in times.index:
            if (np.isnan(timedSections['firstReached'][checkpointName]) 
                or (timeSection['firstReached'][checkpointName] < times['firstReached'][checkpointName])
               ):
                timedSections['serverTime'][checkpointName] = times['serverTime'][checkpointName]
                timedSections['firstReached'][checkpointName] = times['firstReached'][checkpointName]
        #print(times)
        #print(type(times))        
    
    # update of firstCompletionDuration
    #timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()
    
    return timedSections

In [None]:
def getDeathsUser( userId ):
    #print("getDeathsUser(" + str(userId) + ")")
    
    # List of associated sessions
    sessionsList = getAllSessionsOfUser( df152, userId, True )
    #print("sessionsList=" + str(sessionsList))
    
    # Call getDeaths on all sessions associated with user,
    # then merge by adding
    deathsSections = pd.DataFrame(0, columns=timedSectionsDeathsColumns,index=timedSectionsIndex)
    
    for sessionId in sessionsList['sessionId']:        
        #print("processing user " + str(userId) + " with session " + str(sessionId))
        deaths = getDeaths( sessionId )
        
        # merge
        # for each checkpoint reached, update if necessary
        for index in deaths.index:
            #print("index=" + str(index))
            checkpointName = deaths['section'][index]
            #print("checkpointName=" + str(checkpointName))
            #print("deaths['deathsCount']["+str(index)+"]=" + str(deaths['deathsCount'][index]))
            
            deathsSections['deathsCount'][checkpointName] = deathsSections['deathsCount'][checkpointName] + deaths['deathsCount'][index]
    
    return deathsSections

## Craft events: equip, unequip, add, remove

#### event-column association

    equip device = 'add' + customData.device
    
    unequip device = 'remove' + customData.device
    
    add brick = 'add' + customData.biobrick
    
    remove brick = 'remove' + customData.biobrick

In [None]:
# Static data
# craftEventsColumns = pd.DataFrame(
#    index=list(range(4)),
#    data={
#        'eventCode' : pd.Categorical(["equip","unequip","add","remove"]),
#        'eventType' : pd.Categorical(["add","remove","add","remove"]),
#        'column' : pd.Categorical(["customData.device","customData.device","customData.biobrick","customData.biobrick"]),
#    }
#)
#craftEventsColumns

In [None]:
# Static data
craftEventCodes = list(["equip","unequip","add","remove"])
craftEventsColumns = pd.DataFrame(
    index=craftEventCodes,
    data={
        'eventType' : pd.Categorical(["add","remove","add","remove"]),
        'column' : pd.Categorical(["customData.device","customData.device","customData.biobrick","customData.biobrick"]),
    }
)

In [None]:
# Returns a given session's checkpoints, and event count
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getSectionsCraftEvents( eventCode, sessionId ):
    #print("getSectionsCraftEvents(" + str(eventCode) + "," + str(sessionId) + ")")
    sectionsEvents = pd.DataFrame(0, columns=eventSectionsCountColumns, index=range(0))
    if eventCode in craftEventCodes:
        eventType = craftEventsColumns['eventType'][eventCode]
        events = df152[df152['type']==eventType]
        events = events[events[craftEventsColumns['column'][eventCode]].notnull()]
        #print("getSectionsCraftEvents(" + str(eventCode) + "," + str(sessionId) + "): #events=" + str(len(events)))
        #print("events=" + str(events.head()))
        events = events.loc[:,perSessionRelevantColumns]
        perSession = events[events['sessionId']==sessionId]
        perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
        sectionsEvents = perSession.groupby("section").size().reset_index(name='count')
    else:
        print("incorrect event code '" + eventCode + "'")
    return sectionsEvents

In [None]:
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getUserSectionsCraftEvents( eventCode, userId, sessionsList = [] ):
    #print("getUserSectionsCraftEvents(" + str(eventCode) + "," + str(userId) + ")")
    
    # Call getSectionsEvents on all sessions associated with user,
    # then merge by adding
    userSectionsEvents = pd.DataFrame(0, columns=eventSectionsColumns,index=timedSectionsIndex)
    
    if eventCode in craftEventCodes:
        # List of associated sessions
        if(len(sessionsList) == 0):
            sessionsList = getAllSessionsOfUser( df152, userId, True )
        #print("sessionsList=" + str(sessionsList))
    
        for sessionId in sessionsList['sessionId']:
            sessionSectionsEvents = getSectionsCraftEvents( eventCode, sessionId )

            # merge
            # for each checkpoint reached, update if necessary
            for index in sessionSectionsEvents.index:
                checkpointName = sessionSectionsEvents['section'][index]
                userSectionsEvents['count'][checkpointName] = userSectionsEvents['count'][checkpointName] + sessionSectionsEvents['count'][index]
    else:
        print("incorrect event code '" + eventCode + "'")
    return userSectionsEvents

In [None]:
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getUserSectionsCraftEventsTotal( eventCode, userId, sessionsList = [] ):
    #print("getUserSectionsCraftEventsTotal(" + str(eventCode) + "," + str(userId) + ")")
    events = getUserSectionsCraftEvents( eventCode, userId, sessionsList )
    return events.values.sum()

## Generic functions

### Generic count

In [None]:
# Returns a given session's checkpoints, and event count
def getSectionsEvents( eventType, sessionId ):
    events = df152[df152['type']==eventType].loc[:,perSessionRelevantColumns]
    perSession = events[events['sessionId']==sessionId]
    perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
    sectionsEvents = perSession.groupby("section").size().reset_index(name='count')
    return sectionsEvents

In [None]:
def getUserSectionsEvents( eventType, userId, sessionsList=[] ):
    
    # List of associated sessions
    if(len(sessionsList) == 0):
        sessionsList = getAllSessionsOfUser( df152, userId, True )
    
    # Call getSectionsEvents on all sessions associated with user,
    # then merge by adding
    userSectionsEvents = pd.DataFrame(0, columns=eventSectionsColumns,index=timedSectionsIndex)
    
    for sessionId in sessionsList['sessionId']:
        sessionSectionsEvents = getSectionsEvents( eventType, sessionId )
        
        # merge
        # for each checkpoint reached, update if necessary
        for index in sessionSectionsEvents.index:
            checkpointName = sessionSectionsEvents['section'][index]
            userSectionsEvents['count'][checkpointName] = userSectionsEvents['count'][checkpointName] + sessionSectionsEvents['count'][index]
    
    return userSectionsEvents

In [None]:
def getUserSectionsEventsTotal( eventType, userId, sessionsList=[] ):
    events = getUserSectionsEvents( eventType, userId, sessionsList )
    return events.values.sum()

### No section events

In [None]:
def getUserEventsTotal( eventType, userId, sessionsList=[] ):
    if(len(sessionsList) == 0):
        sessionsList = getAllSessionsOfUser( df152, userId, True )
        
    result = 0

    for sessionId in sessionsList['sessionId']:
        sessionEvents = df152[df152['type']==eventType]
        perSession = sessionEvents[sessionEvents['sessionId']==sessionId]
        result = result + len(perSession)
    return result

### Other

In [None]:
# Returns a given user's unique reached checkpoints
def getUserCheckpoints( userId ):
    #print("getUserCheckpoints(" + str(userId) + ")")

    # List of associated sessions
    sessionsList = getAllSessionsOfUser( df152, userId, True )
    #print("sessionsList=" + str(sessionsList))
    
    # List all 'reach' events with those sessionIds.
    perUser = reachEvents[reachEvents['sessionId'].isin(sessionsList[sessionsList['userId']==userId])]
    perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
    return pd.Series(perUser['section'].unique())


def getDiscrepancyGameGForm( userId ):
    gformNonVal = getNonValidatedCheckpoints(userId)
    gformVal = getValidatedCheckpoints(userId)
    gameVal = getUserCheckpoints(userId)
    
    #sorted, unique values in series1 that are not in series2
    #np.setdiff1d(series1.values, series2.values)
    
    #user has answered questions whose answer they haven't seen in the game
    gameNotEnough = pd.Series(np.setdiff1d(gformVal.values, gameVal.values))
    
    #user has not answered questions whose answer they have seen in the game
    gformNotEnough = []
    maxGameVal = ''
    if gameVal.values.size!=0:
        gameVal.values.max()
    for nonVal in gformNonVal.values:
        if nonVal >= maxGameVal:
            gformNotEnough.append(nonVal)    
    gformNotEnough = pd.Series(gformNotEnough)
    
    return (gameNotEnough, gformNotEnough)

In [None]:
# Static data
noSectionEventCodes = list(['start', 'selectmenu', 'switch', 'restart',\
                            'gotourl', 'gotomooc', 'configure'])

In [None]:
simpleEvents = [
    'complete',
    'configure',
    'craft',
    'death',
    'equip',
    'unequip',
    'add',
    'remove',
    'gotomooc',
    'gotourl',
    'pickup',
    'reach',
    'restart',
    'selectmenu',
    'start',
    'switch',
    ]

# possible events: complete	configure	craft	death	equip	gotomooc	gotourl	pickup	reach	restart	selectmenu	start	switch	unequip

userDataVectorIndex = [#game
                       'sessionsCount',
                       ]

for temporality in answerTemporalities:
    userDataVectorIndex.append(scoreLabel + temporality)

userDataVectorIndex = np.concatenate( (userDataVectorIndex,
                                     simpleEvents))

In [None]:
#allEvents = df152['type'].unique()
#allEvents = np.concatenate( simpleEvents, allEvents ).unique()
#allUserDataVectorIndex = np.concatenate( userDataVectorIndex, allEvents ).unique()

In [None]:
def getUserDataVector( userId ):
    
    sessionsList = getAllSessionsOfUser( df152, userId, True )
    
    columnName = str(userId)
    
    data = pd.DataFrame(0, columns=[columnName],index=userDataVectorIndex)
    
    score = getScore( userId )
    for _temporality in score.columns:
        _score = score.loc[scoreLabel,_temporality]
        if(len(_score)>0):
            if(_temporality == 'before'):
                _score = _score[len(_score)-1]
            else:
                _score = _score[0]
        else:
            _score = np.nan
        data[columnName][scoreLabel+_temporality] = _score
    
    data[columnName]['sessionsCount'] = countSessions( df152, userId, False, sessionsList)
    
    for eventName in simpleEvents:
        if eventName in craftEventCodes:
            data[columnName][eventName] = getUserSectionsCraftEventsTotal(eventName, userId, sessionsList)
        elif eventName in noSectionEventCodes:
            data[columnName][eventName] = getUserEventsTotal(eventName, userId, sessionsList)
        else:
            data[columnName][eventName] = getUserSectionsEventsTotal(eventName, userId, sessionsList)
    
    return data