# Preparation

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

from ipywidgets import FloatProgress
from IPython.display import display

In [None]:
pd.__version__

In [None]:
np.__version__

### common variables

In [None]:
checkpointsStem = "tutorial1.Checkpoint";

### columns

In [None]:
# In RedMetrics data, 'playerId' is actually a session ID.
# Permanent player IDs are stored as 'localplayerguid' in 'customdata' attached to 'start' events.
minimalInitializationColumns = ['customData.localplayerguid']
initializationColumns = ['customData.localplayerguid', 'playerId', 'type', 'serverTime', 'customData.platform']
relevantColumns = ['sessionId', 'serverTime', 'userId', 'customData.platform']
playerFilteringColumns = ['sessionId', 'userId', 'customData.platform', 'serverTime']
checkpointsRelevantColumns = ['sessionId', 'userId', 'type', 'section']

### dataframes for the different game versions

In [None]:
def getNormalizedRedMetricsCSV( df ):
    newColumns = np.unique(np.concatenate((minimalInitializationColumns, df.columns.values)))
    return safeGetNormalizedRedMetricsCSV(df.loc[:,newColumns])

def safeGetNormalizedRedMetricsCSV( df ):
    return df.rename(index=str, columns={'customData.localplayerguid' : 'userId', 'playerId': 'sessionId'})

In [None]:
relevantColumns = ['customData.localplayerguid', 'playerId', 'type']
renamedRelevantColumns = ['userId', 'sessionId', 'type']

# Old versions

rdf100 = pd.read_csv("../../data/1.0.csv")
rdf131 = pd.read_csv("../../data/1.31.csv")
rdf132 = pd.read_csv("../../data/1.32.csv")
rdf133 = pd.read_csv("../../data/1.33.csv")
rdf140 = pd.read_csv("../../data/1.40.csv")
rdf150 = pd.read_csv("../../data/1.50.csv")
rdf151 = pd.read_csv("../../data/1.51.csv")

part100 = rdf100.loc[:,relevantColumns]
part131 = rdf131.loc[:,relevantColumns]
part132 = rdf132.loc[:,relevantColumns]
part133 = rdf133.loc[:,relevantColumns]
part140 = rdf140.loc[:,relevantColumns]
part150 = rdf150.loc[:,relevantColumns]
part151 = rdf151.loc[:,relevantColumns]

# Tests

rdftest = pd.read_csv("../../data/2017-10-11.test.csv")
dftest = getNormalizedRedMetricsCSV(rdftest)

# Version 1.52

In [None]:
dataFilesNamesStem = "2018-01-15"

rdf152 = pd.read_csv("../../data/" + dataFilesNamesStem + ".1.52.csv")

part152 = rdf152.loc[:,relevantColumns]

df152 = getNormalizedRedMetricsCSV(rdf152)

# All versions

rdf = pd.concat([part100, 
                      part131, part132, part133, 
                      part140, 
                      part150, part151, part152])

df = getNormalizedRedMetricsCSV(rdf)

### user ids

#### TODO FIXME automatize through switchversion events or using tests' logs *dftest*

In [None]:
arcadeCabinetID = '"deb089c0-9be3-4b75-9b27-28963c77b10c"';
devPCID1 = '"7412a447-8177-48e9-82c5-cb31032f76a9"'
devPCID2 = '"7bb82da4-293a-4597-ba62-cc432b5898a5"'
devPCID3 = '"2036f6f4-50a1-4f94-8d4e-bfca814bbda8"'
devPCID4 = '"30d15b5f-2cf6-46c9-b187-c16a62f4259f"'

devPCID5 = '"7825d421-d668-4481-898a-46b51efe40f0"'
devPCID6 = '"97cdfb48-40e8-498e-afde-98f7290679f9"'
devPCID7 = '"3161ae02-6abd-4834-8208-686c4cd5363c"'
# possibly a tablet: a4d4b030-9117-4331-ba48-90dc05a7e65a

excludedIDs = ['null', devPCID1, devPCID2, devPCID3, devPCID4, devPCID5];