In [2]:
import pandas as pd
import numpy as np
import pickle
import datetime
import json
import re
import os, gzip, shutil, fnmatch

import sklearn
import matplotlib

from sklearn import preprocessing
from matplotlib import pyplot as plt 

___
#### Dataframe of Extracted Data from Daniel
- Device
- Keyboard
- Survey
    - AffectDF extracted from Survey DF
___

In [5]:
deviceUsagePath = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/ExtractedData/run1_sk_deviceUsage.csv"
keyboardPath = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/ExtractedData/run1_sk_keyboard.csv"
surveyPath = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/ExtractedData/run1_survey_results.csv"

device = pd.read_csv(deviceUsagePath).reset_index(drop=True)
keyboard = pd.read_csv(keyboardPath).reset_index(drop=True)
survey = pd.read_csv(surveyPath).reset_index(drop=True)

In [6]:
affect_df = survey.loc[survey.ResultIdentifier.str.startswith('affect'), ['ParticipantIdentifier', 'ResultIdentifier', 'Answers', 'trial_date']]
affect_df = affect_df[~affect_df.ResultIdentifier.str.endswith('_am')].reset_index(drop=True)
keyboard.set_index('trial_date', inplace=True)

affect_df.head(2)

Unnamed: 0,ParticipantIdentifier,ResultIdentifier,Answers,trial_date
0,90592e06-bcf6-4150-85b0-c5daf7e7569c,affect_neg_frustrated,2,2022-10-24
1,90592e06-bcf6-4150-85b0-c5daf7e7569c,affect_pos_relaxedCalm,3,2022-10-24


___
Calculate Delete Ratio from the keyboard dataframe 
___

In [7]:
## Getting delete ratio
word_count = keyboard.groupby(['ParticipantIdentifier', 'trial_date'])['keyboard_total_words'].sum().reset_index(name = 'word_count')
delete_count = keyboard.groupby(['ParticipantIdentifier', 'trial_date'])['keyboard_total_deletes'].sum().reset_index(name = 'delete_count')
word_count['del_ratio'] = delete_count.delete_count / word_count.word_count
word_count = word_count.reset_index(drop=True)
word_count.head(5)

Unnamed: 0,ParticipantIdentifier,trial_date,word_count,del_ratio
0,01801252-3a7e-4f5f-8b6d-49e8da3902f3,2022-09-27,94,0.12766
1,01801252-3a7e-4f5f-8b6d-49e8da3902f3,2022-09-29,139,0.546763
2,01801252-3a7e-4f5f-8b6d-49e8da3902f3,2022-09-30,66,0.30303
3,01801252-3a7e-4f5f-8b6d-49e8da3902f3,2022-10-01,253,0.233202
4,01801252-3a7e-4f5f-8b6d-49e8da3902f3,2022-10-02,100,0.39


___
Merge wordCount DF and AffectDF
___

In [11]:
cor_df = word_count.merge(affect_df, how='left', on=['ParticipantIdentifier', 'trial_date'])
cor_df.Answers = pd.to_numeric(cor_df.Answers)

data = cor_df.loc[cor_df.ResultIdentifier=="affect_neg_stressed"]
data = data.loc[data.ParticipantIdentifier=='0501ba67-3406-4779-aff1-878a0e9f7885']
data.head(5)

Unnamed: 0,ParticipantIdentifier,trial_date,word_count,del_ratio,ResultIdentifier,Answers
28,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-16,53,2.207547,affect_neg_stressed,2.0
48,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-17,16,0.875,affect_neg_stressed,2.0
68,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-21,24,0.5,affect_neg_stressed,1.0
88,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-22,110,0.890909,affect_neg_stressed,3.0
108,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-23,24,0.25,affect_neg_stressed,3.0


___
Get affectDF pertaining to a single participant
- Then extracting stressed rows
___

In [13]:
participant = "90592e06-bcf6-4150-85b0-c5daf7e7569c"

In [19]:
import pandas as pd
df = pd.read_csv("/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/ExtractedData/run1_survey_results.csv")
affect_df = df.loc[df.ResultIdentifier.str.startswith('affect'), ['ParticipantIdentifier', 'ResultIdentifier', 'Answers', 'trial_date', 'time']]
affect_df = affect_df[~affect_df.ResultIdentifier.str.endswith('_am')].reset_index(drop=True)
affect_df = affect_df.loc[affect_df.ResultIdentifier.str.endswith("stressed")]
affect_df = affect_df.loc[affect_df.ParticipantIdentifier == participant]
affect_df = affect_df.sort_values(by=["trial_date"])
affect_df.head(3)

Unnamed: 0,ParticipantIdentifier,ResultIdentifier,Answers,trial_date,time
20784,90592e06-bcf6-4150-85b0-c5daf7e7569c,affect_neg_stressed,4,2022-09-29,22:12:20
42005,90592e06-bcf6-4150-85b0-c5daf7e7569c,affect_neg_stressed,4,2022-09-30,20:10:10
41017,90592e06-bcf6-4150-85b0-c5daf7e7569c,affect_neg_stressed,5,2022-10-01,22:01:25


___
Get word count of a particular participant from Daniel's Extracted data
___

In [20]:
df = pd.read_csv("/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/ExtractedData/run1_sk_keyboard.csv")
df.set_index('trial_date', inplace=True)
df = df.groupby(['ParticipantIdentifier', 'trial_date'])['keyboard_total_words'].sum().reset_index(name = 'word_count')

In [23]:
df = df.loc[df.ParticipantIdentifier == "90592e06-bcf6-4150-85b0-c5daf7e7569c"]
df.head(20)

Unnamed: 0,ParticipantIdentifier,trial_date,word_count
1357,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-09-26,34
1358,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-09-27,18
1359,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-09-28,112
1360,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-09-29,56
1361,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-09-30,388
1362,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-10-01,231
1363,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-10-02,67
1364,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-10-03,178
1365,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-10-04,128
1366,90592e06-bcf6-4150-85b0-c5daf7e7569c,2022-10-05,309


___
Using Daniel's extracted data:
- Participant 90592e06-bcf6-4150-85b0-c5daf7e7569c	
- Date 2022-10-14	
- Word count 219

Manually counting from raw data from myDataHelps
- Participant 90592e06-bcf6-4150-85b0-c5daf7e7569c	
- Date 2022-10-14	
- Word count 517

Using word dataframe created by running sentiments.ipynb
- Participant 90592e06-bcf6-4150-85b0-c5daf7e7569c	
- Date 2022-10-14	
- Word count 502
___