In [98]:
import pandas as pd

community_df = pd.read_csv("data\Story+Scale_February+24,+2022_05.59.csv")
surveyswap_df = pd.read_csv(
    "data\Story+Scale+-+SurveySwap_February+24,+2022_06.03.csv")

In [101]:
# Variable with question order automatically shows storyID
# rename accordingly
community_df.rename(
    columns={"Stories-Feb17,2022_DO": "story_id"}, inplace=True)
surveyswap_df.rename(
    columns={"Stories-Feb17,2022_DO": "story_id"}, inplace=True)

# Extract prompt abbreviation
surveyswap_df["prompt_label"] = surveyswap_df["story_id"].str.extract(
    r"_(.*)_")
community_df["prompt_label"] = community_df["story_id"].str.extract(r"_(.*)_")

# Rename prompt abbreviation to full name
prompt_rename_dict = {"HF": "High Fantasy", "HOR": "Horror",
                      "HR": "Historical Romance", "HSF": "Hard Sci-Fi"}
surveyswap_df["prompt_label"].replace(prompt_rename_dict, inplace=True)
community_df["prompt_label"].replace(prompt_rename_dict, inplace=True)

In [102]:
# first line has description of variables so also of itmes
# save these to own df, then delete in main dfs
items_descr = surveyswap_df.iloc[0, 18:94]
surveyswap_df.drop(index=[0, 1], inplace=True)
community_df.drop(index=[0, 1], inplace=True)

In [103]:
# Quick clean up of the descriptions
items_descr = items_descr.str.replace(
    "For the following questions, please think of the story you just read.\nIndicate how much you agree or disagree with each of the following statements about the story. - ", "", regex=False)

In [104]:
# SurveySwap survey contained quality checks
# Label if participants passed those
def evaluate_qual_check_1(row):
    # returns True if passed check
    if (row["Qual_Check_1"] == '...someone working in a tavern.') and (row["prompt_label"] == "Historical Romance"):
        return True
    elif (row["Qual_Check_1"] == '...a noise coming from a mirror.') and (row["prompt_label"] == "Horror"):
        return True
    elif (row["Qual_Check_1"] == '...with a message from the president.') and (row["prompt_label"] == "Hard Sci-Fi"):
        return True
    elif (row["Qual_Check_1"] == '...the description of a small village.') and (row["prompt_label"] == "High Fantasy"):
        return True
    else:
        return False

# Check quality check 1 - participants correctly identified beginning of story
surveyswap_df["pass_qual_1"] = surveyswap_df.apply(
    evaluate_qual_check_1, axis=1)
# Check quality check 2 - participants were asked to give specific answer to question
surveyswap_df["pass_qual_2"] = surveyswap_df.apply(
    lambda row: True if row["story_scale_74"] == "Somewhat disagree" else False, axis=1)

# quality check 2 marks bad respondent either way
# if quality check 1 is failed, but 2 is passed needs closer look
mask_inspect_qual = (surveyswap_df["pass_qual_1"] == False) & (surveyswap_df["pass_qual_2"] == True)
print("Do any cases need further investigation?\n" + str(mask_inspect_qual.value_counts()))

# okay no closer inspection needed
# create dataframe with only good respondents
mask_passed = (surveyswap_df["pass_qual_1"] == True) & (surveyswap_df["pass_qual_2"] == True)
surveyswap_passed_df = surveyswap_df[mask_passed]

Do any cases need further investigation?
False    41
dtype: int64


In [105]:
good_resp = len(surveyswap_passed_df)
bad_respondents = len(surveyswap_df) - good_resp
bar_resp_per = (bad_respondents/len(surveyswap_df))*100
print("Good respondents from SurveySwap:\t\t {}".format(good_resp))
print("Bad respondents from SurveySwap (filtered out):\t {}".format(bad_respondents))
print("That is {}% bad respondents - yay SurveySwap has really ")

Good respondents from SurveySwap:		 27
Bad respondents from SurveySwap (filtered out):	 14
That is {}% bad respondents - yay SurveySwap has really 


In [106]:
# Time to combine community and SurveySwap sample
combined_df = pd.concat([community_df, surveyswap_passed_df])

In [107]:
combined_df["UserLanguage"]

2     EN
3     EN
4     EN
5     EN
6     EN
      ..
34    EN
37    EN
40    EN
41    EN
42    EN
Name: UserLanguage, Length: 97, dtype: object

In [108]:
# delete identifying or unnecessary columns
cols_to_delete = ['Status', 'IPAddress', 'Progress', 'Finished', 'RecipientLastName', 'RecipientFirstName',
                  'RecipientEmail', 'ExternalReference', 'LocationLatitude', 'LocationLongitude',
                  'DistributionChannel', 'UserLanguage', '1']

combined_df.drop(columns=cols_to_delete, inplace=True)

In [109]:
# Rename remaining columns to a more sensible and easier to use naming-scheme

# build mapping dict for renaming
rename_cols_dict_1 = {'StartDate': "start", 'EndDate': "end", 'Duration (in seconds)': "duration_in_sec",
                    'RecordedDate': "recorded", 'ResponseId': "response_id",
                     "story_scale_DO": "story_scale_order",
                      "Qual_Check_1": "qual_check_1", "Qual_Check_1_DO": "qual_check_1_order"}

rename_cols_dict_2 = {}

for i in range(1,13):
    current_item = "story_scale_" + str(i)
    new_item = "story_cohesion_" + str(i)
    rename_cols_dict_2[current_item] = new_item

a = 1
for i in range(13,21):
    current_item = "story_scale_" + str(i)
    new_item = "story_cons_char_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
a = 1
for i in range(21,33):
    current_item = "story_scale_" + str(i)
    new_item = "story_creativity_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
a = 1
for i in range(33,41):
    current_item = "story_scale_" + str(i)
    new_item = "story_quality_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
a = 1
for i in range(41,53):
    current_item = "story_scale_" + str(i)
    new_item = "story_repitition_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
a = 1
for i in range(53,65):
    current_item = "story_scale_" + str(i)
    new_item = "story_style_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
a = 1
for i in range(65,74):
    current_item = "story_scale_" + str(i)
    new_item = "story_pacing_" + str(a)
    rename_cols_dict_2[current_item] = new_item
    a += 1
    
rename_cols_dict_2["story_scale_74"] = "qual_check_2"

# update items_descr naming
items_descr.rename(rename_cols_dict_2, inplace = True)

# update naming for combined_df
combined_df.rename(columns = rename_cols_dict_1, inplace = True)
combined_df.rename(columns = rename_cols_dict_2, inplace = True)
combined_df

Unnamed: 0,start,end,duration_in_sec,recorded,response_id,story_cohesion_1,story_cohesion_2,story_cohesion_3,story_cohesion_4,story_cohesion_5,...,story_pacing_8,story_pacing_9,story_scale_order,story_id,prompt_label,qual_check_1,qual_check_1_order,qual_check_2,pass_qual_1,pass_qual_2
2,2022-02-17 07:41:22,2022-02-17 07:58:46,1044,2022-02-17 07:58:47,R_3LZPTbqxmeWvp7w,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,...,Somewhat disagree,Somewhat agree,I had a hard time making sense of what was goi...,GEN_HF_6,High Fantasy,,,,,
3,2022-02-17 07:52:11,2022-02-17 08:01:19,548,2022-02-17 08:01:20,R_3CCMkj9T7UgOtgP,Somewhat disagree,Somewhat agree,Somewhat disagree,Strongly disagree,Somewhat disagree,...,Somewhat agree,Somewhat disagree,I had a hard time making sense of what was goi...,ALL_HR_3,Historical Romance,,,,,
4,2022-02-17 07:55:22,2022-02-17 08:03:16,473,2022-02-17 08:03:16,R_3PBKFhmDXlAQNO6,Strongly agree,Strongly agree,Strongly disagree,Strongly disagree,Neither agree nor disagree,...,Somewhat disagree,Somewhat disagree,I had a hard time making sense of what was goi...,ALL_HOR_2,Horror,,,,,
5,2022-02-17 07:48:01,2022-02-17 08:06:41,1119,2022-02-17 08:06:42,R_upGgPQKBWAUY7tL,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,...,Somewhat agree,Somewhat disagree,I had a hard time making sense of what was goi...,LOW_HR_7,Historical Romance,,,,,
6,2022-02-17 08:01:22,2022-02-17 08:12:33,671,2022-02-17 08:12:34,R_2A0WFAxmTP8WJRW,Strongly disagree,Strongly disagree,Strongly agree,Neither agree nor disagree,Somewhat disagree,...,Somewhat agree,Somewhat agree,I had a hard time making sense of what was goi...,ALL_HOR_3,Horror,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,2022-02-21 13:58:14,2022-02-21 14:08:17,602,2022-02-23 01:48:56,R_3hgzTMcH1Mpo4MF,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,...,Neither agree nor disagree,Somewhat disagree,The plot development in the story was predicta...,LOW_HF_4,High Fantasy,...the description of a small village.,...a noise coming from a mirror.|...someone wo...,Somewhat disagree,True,True
37,2022-02-23 14:37:30,2022-02-23 14:46:11,520,2022-02-23 14:47:32,R_1GUNkOAV0MWTSme,Strongly disagree,Somewhat agree,Strongly agree,Somewhat agree,Strongly agree,...,Somewhat disagree,Strongly disagree,Characters said or did the same thing many tim...,LOW_HOR_8,Horror,...a noise coming from a mirror.,...two armies clashing in a battle.|...someone...,Somewhat disagree,True,True
40,2022-02-23 23:43:27,2022-02-23 23:57:56,869,2022-02-24 04:11:29,R_23a6yJUO5DGDsaM,Strongly agree,Strongly agree,Neither agree nor disagree,Neither agree nor disagree,Neither agree nor disagree,...,Somewhat agree,Neither agree nor disagree,My understanding of the characters in the stor...,MOR_HSF_2,Hard Sci-Fi,...with a message from the president.,...with a message from the president.|...the d...,Somewhat disagree,True,True
41,2022-02-23 21:30:12,2022-02-23 21:41:27,674,2022-02-24 04:11:29,R_1K6y4S6OdmMmgJf,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat disagree,Somewhat agree,...,Somewhat agree,Somewhat disagree,The author's choice of words was elegant|Many ...,ALL_HF_2,High Fantasy,...the description of a small village.,...two armies clashing in a battle.|...with a ...,Somewhat disagree,True,True
