# User comparison tests

# Table of Contents

[Preparation](#preparation)

[User data vectors](#userdatavectors)

[User lists](#userlists)

[Sessions' checkpoints](#sessionscheckpoints)

[Assembly](#assembly)

[Time](#time)

# Preparation
<a id=preparation />

In [None]:
%run "../Functions/1. Google form analysis.ipynb"
%run "../Functions/4. User comparison.ipynb"

# Data vectors of users
<a id=userdatavectors />

In [None]:
#getAllResponders()

In [None]:
setAnswerTemporalities(gform)

# getAllUserVectorData

In [None]:
# small sample
#allData = getAllUserVectorData( getAllUsers( df152 )[:10] )

# complete set
#allData = getAllUserVectorData( getAllUsers( df152 ) )

# subjects who answered the gform
allData = getAllUserVectorData( getAllResponders() )

# 10 subjects who answered the gform
#allData = getAllUserVectorData( getAllResponders()[:10] )

In [None]:
len(allData.columns)

In [None]:
userIds = getAllResponders()
_source = correctAnswers

In [None]:
# _source is used as correction source, if we want to include answers to these questions
#def getAllUserVectorData( userIds, _source = [] ):
    
# result
isInitialized = False
allData = []

f = FloatProgress(min=0, max=len(userIds))
display(f)

for userId in userIds:
    #print(str(userId))
    f.value += 1
    if not isInitialized:
        isInitialized = True
        allData = getUserDataVector(userId, _source = _source)
    else:
        allData = pd.concat([allData, getUserDataVector(userId, _source = _source)], axis=1)

#print('done')
allData

In [None]:
userId

# Correlation Matrix

In [None]:
methods = ['pearson', 'kendall', 'spearman']

_allUserVectorData = allData.T
_method = methods[0]
_title='RedMetrics Correlations'
_abs=True
_clustered=False
_figsize = (20,20)


#def plotAllUserVectorDataCorrelationMatrix(
#    _allUserVectorData,
#    _method = methods[0], 
#    _title='RedMetrics Correlations', 
#    _abs=False,
#    _clustered=False, 
#    _figsize = (20,20)
#):
    
_progress = FloatProgress(min=0, max=3)
display(_progress)

# computation of correlation matrix
_m = _method
if(not (_method in methods)):
    _m = methods[0]
_correlation = _allUserVectorData.astype(float).corr(_m)
_progress.value += 1
if(_abs):
    _correlation = _correlation.abs()
_progress.value += 1

# plot
if(_clustered):
    sns.clustermap(_correlation,cmap=plt.cm.jet,square=True,figsize=_figsize)
else:
    _fig = plt.figure(figsize=_figsize)
    _ax = plt.subplot(111)
    _ax.set_title(_title)
    sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True)
_progress.value += 1

In [None]:
gform['Temporality'].unique()

In [None]:
allData.loc['scoreundefined'].dropna()

In [None]:
getAllUsers(df152)[:10]

In [None]:
len(getAllUsers(df152))

# List of users and their sessions
<a id=userlists />

In [None]:
userSessionsRelevantColumns = ['customData.localplayerguid', 'sessionId']
userSessions = df152[df152['type']=='start'].loc[:,userSessionsRelevantColumns]

In [None]:
userSessions = userSessions.rename(index=str, columns={'customData.localplayerguid': 'userId'})
userSessions.head()

In [None]:
#groupedUserSessions = userSessions.groupby('customData.localplayerguid')
#groupedUserSessions.head()
#groupedUserSessions.describe().head()

# List of sessions with their checkpoints achievements
<a id=sessionscheckpoints />

In [None]:
checkpointsRelevantColumns = ['sessionId', 'customData.localplayerguid', 'type', 'section', 'userTime']
checkpoints = df152.loc[:, checkpointsRelevantColumns]

checkpoints = checkpoints[checkpoints['type']=='reach'].loc[:,['section','sessionId','userTime']]
checkpoints = checkpoints[checkpoints['section'].str.startswith('tutorial', na=False)]
#checkpoints = checkpoints.groupby("sessionId")
#checkpoints = checkpoints.max()
checkpoints.head()

# Assembly of both
<a id=assembly />

In [None]:
#assembled = userSessions.combine_first(checkpoints)
assembled = pd.merge(userSessions, checkpoints, on='sessionId', how='outer')
assembled.head()

In [None]:
userSections = assembled.drop('sessionId', 1)
userSections.head()

In [None]:
userSections = userSections.dropna()
userSections.head()

In [None]:
checkpoints = userSections.groupby("userId")
checkpoints = checkpoints.max()
checkpoints.head()

# Time analysis
<a id=time />

In [None]:
#userTimedSections = userSections.groupby("userId").agg({ "userTime": np.min })
#userTimedSections = userSections.groupby("userId")
userTimes = userSections.groupby("userId").agg({ "userTime": [np.min, np.max] })
userTimes["duration"] = pd.to_datetime(userTimes["userTime"]["amax"]) - pd.to_datetime(userTimes["userTime"]["amin"])
userTimes["duration"] = userTimes["duration"].map(lambda x: np.timedelta64(x, 's'))
userTimes = userTimes.sort_values(by=['duration'], ascending=[False])
userTimes.head()

# TODO

userTimes.loc[:,'duration']
userTimes = userTimes[4:]
userTimes["duration_seconds"] = userTimes["duration"].map(lambda x: pd.Timedelta(x).seconds)
maxDuration = np.max(userTimes["duration_seconds"])
userTimes["duration_rank"] = userTimes["duration_seconds"].rank(ascending=False)
userTimes.plot(x="duration_rank", y="duration_seconds")
plt.xlabel("game session")
plt.ylabel("time played (s)")
plt.legend('')
plt.xlim(0, 139)
plt.ylim(0, maxDuration)

userTimedSections = userSections.groupby("section").agg({ "userTime": np.min })
userTimedSections

userTimedSections["firstReached"] = pd.to_datetime(userTimedSections["userTime"])
userTimedSections.head()

userTimedSections.drop('userTime', 1)
userTimedSections.head()

userTimedSections["firstCompletionDuration"] = userTimedSections["firstReached"].diff()
userTimedSections.head()

In [None]:
sessionCount = 1
_rmDF = df152
sample = gform
before = False
after = True
gfMode = False
rmMode = True

#def getAllUserVectorDataCustom(before, after, gfMode = False, rmMode = True, sessionCount = 1, _rmDF = df152)
userIds = []

if (before and after):
    userIds = getSurveysOfUsersWhoAnsweredBoth(sample, gfMode = gfMode, rmMode = rmMode)
elif before:
    if rmMode:
        userIds = getRMBefores(sample)
    else:
        userIds = getGFBefores(sample)
elif after:
    if rmMode:
        userIds = getRMAfters(sample)
    else:
        userIds = getGFormAfters(sample)
if(len(userIds) > 0):
    userIds = userIds[localplayerguidkey]
    rmUIDs = userIds.map(unsafeGetRedMetricsGUIDFormat)
    allUserVectorData = getAllUserVectorData(rmUIDs, _rmDF = _rmDF)
    allUserVectorData = allUserVectorData.T
    result = allUserVectorData[allUserVectorData['sessionsCount'] == sessionCount].T
    result
else:
    print("no matching user")
    []

In [None]:
result

In [None]:
getAllUserVectorDataCustom(False, True)

In [None]:
userIdsBoth = getSurveysOfUsersWhoAnsweredBoth(gform, gfMode = True, rmMode = True)[localplayerguidkey]
rmUIDBoth = userIdsBoth.map(unsafeGetRedMetricsGUIDFormat)
allUserVectorData = getAllUserVectorData(rmUIDBoth)
allUserVectorData = allUserVectorData.T
return allUserVectorData[allUserVectorData['sessionsCount'] == 1]