In [1]:

#library
import pandas as pd
import os

### Read files

In [2]:

# gives the path
path = os.path.realpath("PostSurveyAnalysis.ipynb")
dir = os.path.dirname(path)

# read the user_data.csv and graphdb data
surveys_en_long = pd.read_csv(dir.replace('scripts', 'input_data') + "/qualtrics_data/230328_FoodCoach_English_long.csv", header = 1)
surveys_de_long = pd.read_csv(dir.replace('scripts', 'input_data') + "/qualtrics_data/230328_FoodCoach_German_long.csv", header = 1)


valid_FC2022_users_with_survey_ids = pd.read_csv(dir.replace('scripts', 'output') + "/230328_valid_FC2022_users_with_survey_ids.csv")



In [3]:
def clean_surveys(df, valid_survey_ids):
    """
    It cleans the survey and keeps the relevant columns for further analysis.
    """
    # rename columns
    for col in df.columns:
        df.rename(columns={col : col.split(" - ")[0] }, inplace=True)

    # rename columns if necessary
    if 'heigt_and_weight_1' in df.columns:

        df.rename(columns = {'heigt_and_weight_1': "height_and_weight_1"}, inplace= True)
    
    if 'heigt_and_weight_2' in df.columns:

        df.rename(columns = {'heigt_and_weight_2': "height_and_weight_2"}, inplace= True)


    # only keep rows with externalDataReference or ExternalReference
    if 'externalDataReference' in df.columns:
        df = df[~df.externalDataReference.isna()]
    
    if 'ExternalReference' in df.columns:
        df = df[~df.ExternalReference.isna()]
    
    # Use dropna() with axis=1 to drop columns where there are no values
    # df = df.dropna(axis=1, how="all")

    # Only keep surveys where more than 50% are answered
    if 'progress' in df.columns:
        df = df[pd.to_numeric(df.progress)  >= 50]

    if 'Progress' in df.columns:
        df = df[pd.to_numeric(df.Progress)  >= 50]

    # only keep certain columns, ['interface_satisfaction'] exists in en but not de
    df = df.loc[:, ('Recorded Date', 'External Data Reference', 
                         'height_and_weight_1',
       'height_and_weight_2', 'ease_of_use_use', 'ease_of_use_learn',
       'ease_of_use_navigation', 'ease_of_use_function', 'ease_of_use_recover',
       'interface_like', 'interface_info', 'interface_progress',
       'interface_comfortable', 'interface_time', 'interface_reuse',
        'usefulness_nutrition', 'usefulness_access',
       'usefulness_management', 'usefulness_function', 'usefulness_cache',
       'usefulness_service')]

    # Only keep surveys that belong to valid FC2022 users
    df = df[pd.to_numeric(df["External Data Reference"]).isin(valid_survey_ids)]

    return df


In [4]:
#Clean surveys and merge the en and de ones
valid_FC2022_survey_ids = valid_FC2022_users_with_survey_ids.post_survey_id
cleaned_surveys_en_long = clean_surveys(surveys_en_long, valid_FC2022_survey_ids)
cleaned_surveys_de_long = clean_surveys(surveys_de_long, valid_FC2022_survey_ids)

experiment_surveys = pd.concat([cleaned_surveys_de_long, cleaned_surveys_en_long], axis = 0)

In [11]:
# Display the averages
experiment_surveys.mean().round(1)

External Data Reference    2092.6
height_and_weight_1         179.1
ease_of_use_use               4.8
ease_of_use_learn             4.8
ease_of_use_navigation        5.0
ease_of_use_function          4.6
ease_of_use_recover           4.4
interface_like                4.3
interface_info                4.4
interface_progress            3.4
interface_comfortable         4.2
interface_time                4.8
interface_reuse               3.4
usefulness_nutrition          3.6
usefulness_access             3.2
usefulness_management         2.9
usefulness_function           2.9
usefulness_cache              4.3
usefulness_service            3.2
dtype: float64

In [12]:
#Display the stds
experiment_surveys.std().round(1)

External Data Reference    73.0
height_and_weight_1        11.3
ease_of_use_use             1.2
ease_of_use_learn           1.5
ease_of_use_navigation      1.5
ease_of_use_function        1.3
ease_of_use_recover         1.4
interface_like              1.7
interface_info              1.2
interface_progress          1.9
interface_comfortable       1.5
interface_time              1.8
interface_reuse             1.8
usefulness_nutrition        2.2
usefulness_access           1.8
usefulness_management       1.5
usefulness_function         1.7
usefulness_cache            1.2
usefulness_service          1.6
dtype: float64

In [9]:
experiment_surveys.shape

(9, 21)