___
**Mood:**
- A state of mind that is not as specific as emotion
- Biases which emotions are felt (DOI:10.1080/10803548.2003.11076589)
___
**Behavioral correlates with mood include**
- Voice modulation (May have something like this through speech telephony)
- Gestures
- Cognitive performance
- Cognitive strategy
- Motor behavior (Is like errors when pressing keys?)
- (DOI:10.1080/10803548.2003.11076589)
___

**Keyboard stokes may predict future moods.**
- But it is not known over how long the data needs to be collected for prediction to be reliable
(https://doi.org/10.1016/j.asej.2021.101660)
___

**Data Available to us**
- Phone Usage
- Speech Telephony
- Keyboard Metrics
___
**Data that seems interesting**
- Total Error Distances
- Total Corrections
- Emoji Counts
- Total ScreenTime
___
**Data that can be used as labels**
- Self report survey scores
- Calculated IB Gaps
___

In [48]:
import pandas as pd
import numpy as np
import pickle
import datetime
import json
import re
import os, gzip, shutil

In [49]:
## data directory
directory = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/Data/"

In [50]:
## Good Subjects
participant = "1e7aef96-16cc-43f8-95d4-e3bc582eb6d3/2017C676-C22A-4318-903C-7544760252BB/"

In [51]:
## Concerned with keyboard metric for now
metric_folder = "sensorkit-keyboard-metrics/iPhone"

___
##### Functions to unzip files in a folder
- Function will unzip all files in a given directory
___

In [52]:
## Recursively unzip everything
import fnmatch
import gzip
import shutil

def gunzip(file_path, output_path):
    with gzip.open(file_path,"rb") as f_in, open(output_path,"wb") as f_out:
        shutil.copyfileobj(f_in, f_out)

def recurse_and_gunzip(root):
    walker = os.walk(root)
    for rootx,dirs,files in walker:
        for f in files:
            if fnmatch.fnmatch(f,"*.gz"):
                gunzip(rootx+"/"+f, rootx+"/"+f.replace(".gz",""))

In [53]:

## Iterative decompression
def gz_extract(directory):
    extension = ".gz"
    os.chdir(directory)
    for item in os.listdir(directory): # loop through items in dir
      if item.endswith(extension): # check for ".gz" extension
          gz_name = os.path.abspath(item) # get full path of files
          file_name = (os.path.basename(gz_name)).rsplit('.',1)[0] #get file name for file within
          with gzip.open(gz_name,"rb") as f_in, open(file_name,"wb") as f_out:
            print(gz_name)
            shutil.copyfileobj(f_in, f_out)
          os.remove(gz_name) # delete zipped file

##### Each JSON File contains two dictionaries:
    - device
    - sample
________________________________________________________________________________
##### The device dictionary contains
    - name
    - phone type
________________________________________________________________________________

##### The sample dictionary has a list of samples
##### Each sample inside this list has the following variables types of interest:
    - Corrections
    - Errors
________________________________________________________________________________
1. Corrections of interest:
    - total Retro Corrections
    - total Insert Key Corrections
    - total Near Key Corrections
    - total Hit Test Corrections
    - total Substitution Corrections
________________________________________________________________________________
2. Errors of interest:
    - shortWordCharKeyUpErrorDistance
    - shortWordCharKeyDownErrorDistance
    - spaceUpErrorDistance
________________________________________________________________________________
3. Other variables of interest:
    - total Typing Episodes
    - timestamp
________________________________________________________________________________

In [54]:
## Functions for single participants
## Get data and corresponding

## Need a loop here to loop over all files in the directory
### directory = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/Data_20220930-20221001/sensorkit-keyboard-metrics/iPhone/1b9b62f1-095b-4819-92a0-ea8e7abee884/C4168B14-53AD-4091-97B5-7A3E4EB4A738"
## recurse_and_gunzip(directory)

correctionsList = []
errorsList = []

## Loop over all the exported data folders/directories
for folder in os.listdir(directory):
    path = directory + folder + "/" + metric_folder + "/" + participant
    gz_extract(path)

    ## Loop over all files in this path/directory
    for fname in os.listdir(path):
        
        filename = ""

        ## name of the file
        if fname.endswith("json"):
            filename = path + fname
        else: 
            continue
        
        ## Load the JSON File
        file = open(filename)
        
        ## Need to use json.load and not json.loads
        loaded_file = json.load(file)

        ## Get the samples list
        samples = loaded_file["samples"]

        ## Get the name
        name = loaded_file["device"]["name"]

        ## Need a loop here to iterate over all samples
        for i in range(len(samples)):

            ## Get the TimeStamp for the current sample
            timeStamp = samples[i]["timestamp"]

            ## Get the sample dictionary
            sample_dict_iterator = samples[i]["sample"]

            ## Get the variables for the current dict iterator
            totalTypingEpisodes = sample_dict_iterator["totalTypingEpisodes"]

            ## Correction variables
            correction_dict_temp = {
                "name": name,
                "timeStamp ": timeStamp,
                "totalRetroCorrections": sample_dict_iterator["totalRetroCorrections"], "totalInsertKeyCorrections": sample_dict_iterator["totalInsertKeyCorrections"],
                "totalNearKeyCorrections": sample_dict_iterator["totalNearKeyCorrections"], "totalHitTestCorrections": sample_dict_iterator["totalHitTestCorrections"],
                "totalSubstitutionCorrections": sample_dict_iterator["totalSubstitutionCorrections"], "totalTranspositionCorrections": sample_dict_iterator["totalTranspositionCorrections"],
                "totalSpaceCorrections": sample_dict_iterator["totalSpaceCorrections"], "totalAutoCorrections": sample_dict_iterator["totalAutoCorrections"]
            }
            correctionsList.append(correction_dict_temp)

            ## Error variables
            ## These are distribution
            ## Taking the mean of the distribution for each sample
            shortWordCharKeyUpErrorDistance = sum(sample_dict_iterator["shortWordCharKeyUpErrorDistance"]["distributionSampleValues"])/len(sample_dict_iterator["shortWordCharKeyUpErrorDistance"]["distributionSampleValues"])
            shortWordCharKeyDownErrorDistance = sum(sample_dict_iterator["shortWordCharKeyDownErrorDistance"]["distributionSampleValues"])/len(sample_dict_iterator["shortWordCharKeyDownErrorDistance"]["distributionSampleValues"])
            spaceUpErrorDistance = sum(sample_dict_iterator["spaceUpErrorDistance"]["distributionSampleValues"])/len(sample_dict_iterator["spaceUpErrorDistance"]["distributionSampleValues"])

            error_dict_temp = {
                "name": name,
                "timeStamp": timeStamp,
                "shortWordCharKeyUpErrorDistance": shortWordCharKeyUpErrorDistance,
                "shortWordCharKeyDownErrorDistance": shortWordCharKeyDownErrorDistance,
                "spaceUpErrorDistance": spaceUpErrorDistance
            }
            errorsList.append(error_dict_temp)

In [58]:
correctionsDF = pd.DataFrame(correctionsList)
errorDF = pd.DataFrame(errorsList)
pd.set_option('display.max_rows', None)
errorDF

Unnamed: 0,name,timeStamp,shortWordCharKeyUpErrorDistance,shortWordCharKeyDownErrorDistance,spaceUpErrorDistance
0,KeilenPalacios,2022-09-29T18:18:11-0400,10.855384,10.952307,58.414735
1,KeilenPalacios,2022-09-29T18:28:36-0400,12.070169,12.746101,43.537646
2,KeilenPalacios,2022-09-29T19:53:09-0400,12.392187,12.124375,51.427825
3,KeilenPalacios,2022-09-29T20:17:53-0400,13.852,13.586666,43.659999
4,KeilenPalacios,2022-09-29T21:43:55-0400,13.854884,13.003256,48.058823
5,KeilenPalacios,2022-09-29T21:52:47-0400,13.075,13.33,44.001904
6,KeilenPalacios,2022-09-29T22:02:33-0400,14.631111,14.471428,45.46857
7,KeilenPalacios,2022-09-29T22:21:48-0400,11.58,11.416727,61.713598
8,KeilenPalacios,2022-09-29T22:36:48-0400,12.441348,12.525842,65.369728
9,KeilenPalacios,2022-09-27T15:17:42-0400,12.657959,12.406122,46.473749


In [56]:
# ## Want to extract the keyboard metrics in a good way
# file_path = "RK.8D1DBFAD.DJW Thesis_20220930-20221001/sensorkit-keyboard-metrics/iPhone/2f32cd19-e9c5-4aad-8999-6f4646169ab6/3400296D-7399-44F9-9E9D-2CA824598AE8/2022-09-28T163510-0400_2022-09-29T071630-0400.json.gz"
# a = gzip.open(file_path, 'rb')
# contents = json.loads(a.read())
# print(pd.DataFrame(contents))
## Join the App data with this DataFrame