# User comparison

# Table of Contents

1.  [Preparation](#preparation)

2.  [Functions](#functions)

3.  [Tests](#tests)

# Preparation
<a id=preparation />

In [None]:
%run "../Functions/3. Per session and per user analysis.ipynb"

# Functions
<a id=functions />

In [None]:
def getAllUsers( dataframe ):
    allUserIds = np.array(dataframe['userId'].unique())
    allUserIds = [i for i in allUserIds if not i in ['nan', np.nan, 'null']]
    return allUserIds

In [None]:
# _source is used as correction source, if we want to include answers to these questions
def getAllUserVectorData( userIds, _source = correctAnswers, _rmDF = rmdf152 ):
    
    # result
    isInitialized = False
    allData = []

    f = FloatProgress(min=0, max=len(userIds))
    display(f)
    
    for userId in userIds:
        #print(str(userId))
        f.value += 1
        dataVector = getUserDataVector(userId, _source = _source, _rmDF = _rmDF)
        if not isInitialized:
            isInitialized = True
            allData = dataVector
        else:
            allData = pd.concat([allData, dataVector], axis=1)

    #print('done')
    return allData

In [None]:
def getAllUserVectorDataCustom(before, after, gfMode = False, rmMode = True, sessionCount = 1, _rmDF = rmdf152):
    userIds = []

    if (before and after):
        userIds = getSurveysOfUsersWhoAnsweredBoth(sample, gfMode = gfMode, rmMode = rmMode)
    elif before:
        if rmMode:
            userIds = getRMBefores(sample)
        else:
            userIds = getGFBefores(sample)
    elif after:
        if rmMode:
            userIds = getRMAfters(sample)
        else:
            userIds = getGFormAfters(sample)
    if(len(userIds) > 0):
        userIds = userIds[localplayerguidkey]
        allUserVectorData = getAllUserVectorData(userIds, _rmDF = _rmDF)
        allUserVectorData = allUserVectorData.T
        result = allUserVectorData[allUserVectorData['sessionsCount'] == sessionCount].T
        return result
    else:
        print("no matching user")
        return []

In [None]:
methods = ['pearson', 'kendall', 'spearman']
def plotAllUserVectorDataCorrelationMatrix(
    _allUserVectorData,
    _method = methods[0], 
    _title='RedMetrics Correlations', 
    _abs=False,
    _clustered=False, 
    _figsize = (20,20)
):
    
    _progress = FloatProgress(min=0, max=3)
    display(_progress)
    
    # computation of correlation matrix
    _m = _method
    if(not (_method in methods)):
        _m = methods[0]
    _correlation = _allUserVectorData.astype(float).corr(_m)
    _progress.value += 1
    if(_abs):
        _correlation = _correlation.abs()
    _progress.value += 1
        
    # plot
    if(_clustered):
        sns.clustermap(_correlation,cmap=plt.cm.jet,square=True,figsize=_figsize)
    else:
        _fig = plt.figure(figsize=_figsize)
        _ax = plt.subplot(111)
        _ax.set_title(_title)
        sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True)
    _progress.value += 1