# Mapper and Reducer

The mapper reads the keyboard log file and extract the lines for Team Fortress 2 key bindings

In [172]:
%%writefile keyLogMapperV2.py
#!/opt/bitnami/python/bin/python
# -*-coding:utf-8 -*
import sys
import string

# The 49 default key bindings for team fortress 2
keyBindings = ["w","a","s","d","Key.space","Key.ctrl_l","'","/","Key.up","Key.down",
                "v","y","u","z","x","c",",",".","m","n","Key.f2","Key.f3","l","g",
               "h","i","f","b","-","r","q","1","2","3","4","5","6","7","8","9","0",
               "t","Key.tab","Key.f5","Key.f6","Key.f7","`","j","k"]

for line in sys.stdin: # reads from stdin
    #reading the keyboard log file, extract the wanted data
    line = line.replace("'", "")
    line = line.strip()
    words = line.split(",")
    keyName = words[1]
    if keyName in keyBindings: #Check whether this line belongs to the 49 key bindings
        time = float(words[0])
        action = words[2]
        output = keyName+","+ str(time) +","+ action
        print(output)

Overwriting keyLogMapperV2.py


The reducer reads the outputs of mapper, and the outputs of mapper were grouped and sorted by hadoop streaming. The reducer will calculte how many times each key was pressed and the avergae press duration for keys.

In [337]:
%%writefile keyLogReducerV2.py
#!/opt/bitnami/python/bin/python
# -*-coding:utf-8 -*

import sys
presses = {} #A dictionary used to track the valid presses of keys
releases = {} #A dictionary used to track the valid releases of keys
duration = {} #A dictionary used to track the average pressed duration of each key
lastKey = None #A flag to monitor whether the reducer is reading a new key's record
lastAction = None #A a flag used to identify continuous repeated presses of a key

#Recording freqencies, pressed times and released times for keys
for line in sys.stdin:
    #reading the sorted output of mapper, extract the wanted data.
    line = line.strip()
    words = line.split("\t")
    key = words[0].split(",")
    keyName = key[0]
    time = float(key[1])
    action = words[1]
    if lastKey is None:
        lastKey = keyName
    if lastKey == keyName:
        if action == "pressed":
            if lastAction=="pressed":
                continue
            if keyName in presses:
                presses[keyName].append(time)
            else:
                presses[keyName] = [time]
            lastAction = "pressed"
        else:
            lastAction = None
            if keyName in releases:
                releases[keyName].append(time)
            else:
                releases[keyName] = [time]
        if keyName not in duration:
            duration[keyName] = 0
    else:
        lastAction=None
        presses[lastKey] = sorted(presses[lastKey])
        releases[lastKey] = sorted(releases[lastKey])
        for x in range(0, len(presses[lastKey])) :
            duration[lastKey] += (releases[lastKey][x]-presses[lastKey][x])
        duration[lastKey] = duration[lastKey]/len(presses[lastKey])
        duration[lastKey] = "{:.5f}".format(duration[lastKey])
        output = lastKey +" "+ str(duration[lastKey])+" "+str(len(presses[lastKey]))
        print(output)
        lastKey = keyName
        if action == "pressed":
            lastAction = "pressed"
            if keyName in presses:
                presses[keyName].append(time)
            else:
                presses[keyName] = [time]
        else:
            lastAction = None
            if keyName in releases:
                releases[keyName].append(time)
            else:
                releases[keyName] = [time]
        if keyName not in duration:
            duration[keyName] = 0


if lastKey is not None:
    presses[lastKey] = sorted(presses[lastKey])
    releases[lastKey] = sorted(releases[lastKey])
    for x in range(0, len(presses[lastKey])) :
        duration[lastKey] += (releases[lastKey][x]-presses[lastKey][x])
    duration[lastKey] = duration[lastKey]/len(presses[lastKey])
    duration[lastKey] = "{:.5f}".format(duration[lastKey])
    output = lastKey +" "+ str(duration[lastKey])+" "+str(len(presses[lastKey]))
    print(output)

Overwriting keyLogReducerV2.py


# Preparing the input files

In [60]:
!hadoop fs -put Hengjun /inputs/Capstone/

2021-11-09 23:31:21,946 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false


In [338]:
!hadoop fs -rm -r /outputs/Capstone/Jonathan

Deleted /outputs/Capstone/Jonathan


# Using hadoop streaming to run the MapReduce job

If you are interested in hadoop streaming, here's the official documentation: https://hadoop.apache.org/docs/r3.2.1/hadoop-streaming/HadoopStreaming.html

In [339]:
!hadoop jar /opt/hadoop-3.2.1/share/hadoop/tools/lib/hadoop-streaming-3.2.1.jar \
    -D mapreduce.job.output.key.comparator.class=org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator \
    -D stream.map.output.field.separator=, \
    -D stream.num.map.output.key.fields=2 \
    -D mapreduce.map.output.key.field.separator=, \
    -D mapreduce.partition.keycomparator.options='-k1,1 -k2,2n' \
    -D mapreduce.job.reduces=1\
    -file $PWD/keyLogMapperV2.py\
    -file $PWD/keyLogReducerV2.py\
    -mapper keyLogMapperV2.py \
    -reducer keyLogReducerV2.py \
    -input /inputs/Capstone/Jonathan \
    -output /outputs/Capstone/Jonathan

2021-11-10 04:44:18,427 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead.
packageJobJar: [/training/Capstone/keyLogMapperV2.py, /training/Capstone/keyLogReducerV2.py, /tmp/hadoop-unjar5839946848324483523/] [] /tmp/streamjob6592926802713631732.jar tmpDir=null
2021-11-10 04:44:19,934 INFO client.RMProxy: Connecting to ResourceManager at resourcemanager/172.18.0.4:8032
2021-11-10 04:44:20,248 INFO client.AHSProxy: Connecting to Application History server at historyserver/172.18.0.3:10200
2021-11-10 04:44:20,322 INFO client.RMProxy: Connecting to ResourceManager at resourcemanager/172.18.0.4:8032
2021-11-10 04:44:20,323 INFO client.AHSProxy: Connecting to Application History server at historyserver/172.18.0.3:10200
2021-11-10 04:44:20,606 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/root/.staging/job_1636493424467_0086
2021-11-10 04:44:20,786 INFO sasl.SaslDataTransferClient: SASL encryption tr

# The outputs in the format of (Key, Avg_Time_Pressed, Times_Pressed )

In [340]:
!hdfs dfs -cat /outputs/Capstone/Jonathan/part-00000

2021-11-10 04:44:47,247 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
1 0.15647 3	
2 0.14135 12	
3 0.10398 5	
4 0.12002 4	
5 0.11190 1	
7 0.10960 1	
Key.f2 0.14490 2	
Key.space 0.31643 21	
a 0.46021 408	
d 0.41633 347	
q 0.15900 1	
s 0.60834 111	
w 2.38953 254	


Copy the output file from Hadoop Distributed File System to local(virtual machine on a Cloud)

In [346]:
!hdfs dfs -copyToLocal /outputs/Capstone/Jonathan/* /training/Capstone/Outputs

2021-11-10 04:49:56,875 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false


In [347]:
!ls /training/Capstone/Outputs/

_SUCCESS  part-00000
