In [3]:
%load_ext autoreload
%autoreload 1
import pandas as pd
import helpers.cleaning as data_cleaning_utils
%aimport helpers.cleaning
import helpers.metrics as metrics
%aimport helpers.metrics
import helpers.db as dbutils
%aimport helpers.db
from progressbar import progressbar

EVENTS_DIR = 'data/events'

db = dbutils.init_firestore_client()

# Download user and events data
users_df = data_cleaning_utils.load_qualtrics_csv('data/qualtrics.csv')
for user_id, row in progressbar(users_df.iterrows()):
    dbutils.download_events_data_for_user(user_id, db, EVENTS_DIR)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


/ |#                                                  | 0 Elapsed Time: 0:00:00
| |#                                                | 260 Elapsed Time: 0:00:00




In [4]:
def create_summary_for_user(user_id: str, users_df: pd.DataFrame):
    
    # All of the below is for a single user
    events = dbutils.load_events_for_user(user_id, EVENTS_DIR)
    events_df = data_cleaning_utils.create_events_df(events)
    tasks_df = data_cleaning_utils.create_task_df_for_user(events_df)

    full_study_completed = 'study_started' in events_df['eventName'].unique() and 'study_finished' in events_df['eventName'].unique()
    qualtrics_completed = user_id in users_df.index.unique()
    show_suggestion = events_df[events_df['eventName'] == 'study_started'].iloc[0]['eventDetails']['user']['showSuggestions']
    attention_check_length = tasks_df.loc['attention_check']['charLength']

    if not show_suggestion:
        # Check that when suggestions are not shown, there are actually no suggestion events (sanity check)
        assert not all(events_df['eventName'].str.startswith('suggestion'))

    print(f"{user_id}\n\
    Study completed: {full_study_completed}\n\
    Qualtrics completed: {qualtrics_completed}\n\
    Attention check length: {attention_check_length}\n")

    # Remove attention check task
    tasks_df = tasks_df.drop('attention_check')

    if show_suggestion:
        print(f"Show suggestions: {show_suggestion}")

        # Create suggestions dataframe
        suggestions_df = data_cleaning_utils.create_suggestions_df_for_user(events_df, tasks_df)
        tasks_df = metrics.compute_metrics_for_tasks(tasks_df, suggestions_df)

    print("User:")
    ssvs_cols = [col for col in users_df.columns if 'ssvs' in col]
    display(users_df[users_df.index == user_id].drop(columns=ssvs_cols))
    display(users_df[users_df.index == user_id][ssvs_cols])

    print("tasks_df:")
    display(tasks_df) # Drop columns that I don't need to visually look at

    return tasks_df

In [5]:
user_id = "p-65f84a674dc27b8856a6086a"
tasks_df = create_summary_for_user(user_id, users_df)

p-65f84a674dc27b8856a6086a
    Study completed: True
    Qualtrics completed: True
    Attention check length: 0

Show suggestions: True
Removing erroneous suggestions: 0/708
User:


Unnamed: 0_level_0,Start Date,qualtrics_duration,birth,age,gender,country,years_in_country,city,education,occupation,languages
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
p-65f84a674dc27b8856a6086a,2024-08-05 13:50:23,147,US,34,Male,US,34,San Jose,Post-graduation,Transportation,English


Unnamed: 0_level_0,ssvs_power,ssvs_achievement,ssvs_hedonism,ssvs_stimulation,ssvs_self-direction,ssvs_universalism,ssvs_benevolence,ssvs_tradition,ssvs_conformity,ssvs_security
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
p-65f84a674dc27b8856a6086a,5.0,5.0,5.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0


tasks_df:


Unnamed: 0_level_0,time_started,prompt,minWords,time_completed,finalHtml,finalHtml_stripped,duration_s,charLength,ai_reliance,suggestion_edit_rate,percentage_edited_suggestions,shown,accepted,ignored,rejected
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
tutorial,1722886483592,Write an essay on the importance of education.,-1,1722887001283,Education is the key to success in today's com...,Education is the key to success in today's com...,517.691,627,0.076555,0.0,0.0,595,1,593,1
food,1722887003547,What is your favorite food and why?,50,1722887149029,"My favorite food is sushi because it is fresh,...","My favorite food is sushi because it is fresh,...",145.482,306,0.598039,0.006173,0.166667,79,6,73,0
public_figure,1722887150197,Who is your favorite celebrity or public figur...,50,1722887243990,Celebrity that I admire the most is the talent...,Celebrity that I admire the most is the talent...,93.793,313,0.86262,0.0,0.0,15,8,7,0
festival,1722887254958,Which is your favorite festival/holiday and ho...,50,1722887330191,Holiday season is always magical for me. festi...,Holiday season is always magical for me. festi...,75.233,322,0.838509,0.0,0.0,10,7,3,0
leave,1722887331916,Write an email to your boss asking them for a ...,50,1722887416650,My boss I hope this email finds you well. I wa...,My boss I hope this email finds you well. I wa...,84.734,269,0.858736,0.0,0.0,9,7,2,0


In [6]:
print("\n---\n".join(tasks_df['finalHtml_stripped'].tolist()))

My favorite food is meatloaf. I know it's like peasant food, but it is somewhat comforting. I make it much like I make my meatballs but add additional ingredients. Sometimes it's an hard-boiled egg in the middle, sometimes onions and peppers (often jalapenos), and sometimes I wrap it in bacon. Add mashed potatoes and peas and I am a happy camper.
---
My favorite public figure would have to be the current President of the United States, Joe Biden. He brought us out of chaos with his calm and measured approach. He exemplifies integrity and honor, unlike his predecessor. I feel our country is once again respected instead of the laughingstock we had become. 
---
My favorite holiday is Christmas. When my son was little, we had a time-honored tradition of opening one present on Christmas Eve, before Santa dropped off the rest of the gifts. And of course, after the excitement of Christmas morning, we had a traditional meal of turkey and all the fixings usually with all the friends we consider