# Game sessions

## Preparation

In [None]:
from random import randint
import re
%run "../Utilities/Preparation.ipynb"
%run "../Static data/Google form correct answers.ipynb"

## Functions

In [None]:
#RedMetrics
#user id in format localplayerguid = '"8d352896-a3f1-471c-8439-0f426df901c1"'
# source: https://stackoverflow.com/questions/42047994/regex-how-to-find-a-guid-in-a-long-string

GUIDpattern = '[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}'
RedMetricsGUIDpattern = '^"' + GUIDpattern + '"$'

def isRedMetricsGUIDFormat( guid ):
    return re.search(RedMetricsGUIDpattern, guid)

def getRandomRedMetricsGUID():
    _uniqueUsers = df152['userId'].dropna().unique()
    _userCount = len(_uniqueUsers)
    _guid = '0'
    while (not isRedMetricsGUIDFormat(_guid)):
        _userIndex = randint(0,_userCount-1)
        _guid = _uniqueUsers[_userIndex]
    return _guid

In [None]:
def getAllSessions( df, dropna ):
    _result = df.loc[:, renamedRelevantColumns]
    _result = _result[_result['type']=='start']
    _result = _result.drop('type', 1)
    if dropna:
        _result = _result.dropna(how='any')
    return _result

def getAllSessionsOfUser( df, userId, dropna=True ):
    _sessionsList = getAllSessions( df, dropna )
    return getAllSessionsOfUserOptimized(_sessionsList, userId)

# sessionsList is assumed to contain the dataframe of userIds and sessionsIds
def getAllSessionsOfUserOptimized (sessionsList, userId):
    return sessionsList[sessionsList['userId']==userId]

# includewithoutusers=True will count sessions that do not have any userId attached
def countSessions( df, userId, includewithoutusers, sessionsList=[]):
    # List of associated sessions
    if(len(sessionsList) == 0):
        sessionsList = getAllSessionsOfUser( df152, userId, True )
    if(not includewithoutusers):
        sessionsList = sessionsList.dropna(how='any')
    return sessionsList['sessionId'].nunique()

# sessionsList is assumed to contain the dataframe of userIds and sessionsIds
def getSessionsCountOptimized( df, userId):
    return getAllSessionsOfUserOptimized( df, userId ).shape[0]

def getUserSessionsCounts( df ):
    _allSessions = getAllSessions( df, True )
    return _allSessions.groupby('userId').size().reset_index(name='counts').sort_values(by='counts', ascending=False)

In [None]:
# get datetime of first significant event
# _userId is assumed to be in RedMetrics format
def getFirstEventDate( _userId ):
    _sessions = getAllSessionsOfUser(df152, _userId, True)
    
    _firstGameTime = pd.Timestamp.max.tz_localize('utc')

    for session in _sessions['sessionId']:
        _timedEvents = df152[df152['sessionId']==session]
        _timedEvents = _timedEvents.dropna(subset=['section'])
        
        if(len(_timedEvents) > 0):
            _timedEvents['userTime'] = _timedEvents['userTime'].map(lambda t: pd.to_datetime(t, utc=True))

            _earliest = _timedEvents['userTime'].min()
            if(_earliest < _firstGameTime):
                _firstGameTime = _earliest

    return _firstGameTime