In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

This script will add a column for all classmates of the students. Classmates are placed in random order as a comma separated string. 

In [2]:
participant_data_path = "./example_data"
participant_input_filename = "test_participantList.csv"

random_state=np.random.RandomState(1234)

In [3]:
column_dict = {
    "Personal Data": "attribute_2 <PersonalData>",
    "ClassId": "attribute_3 <ClassID>",
    "Stamnr": "token",
    "Name":"firstname"
}
classmates_column_name = "attribute_1 <Classmates>"
pupils_column_name = "attribute_1 <Pupils>"

In [4]:
participant_data_path = Path(participant_data_path)
participant_input_file = participant_data_path / participant_input_filename
school_df = pd.read_csv(participant_input_file)
school_df = school_df.loc[school_df["Consent"] != "No"].drop(["School", "Class", "Gender", "Age", "Consent"], axis=1)

## Students file preparation

In [5]:
students_df = school_df.rename(columns=column_dict).drop("Mentor", axis=1)

In [6]:
for i, student in students_df.iterrows():
    students_df.at[i,classmates_column_name] = students_df \
        .drop(i) \
        .loc[students_df["attribute_3 <ClassID>"] == student["attribute_3 <ClassID>"]] \
        .firstname \
        .sample(frac=1, random_state=random_state).str.cat(sep=', ')

In [7]:
students_df["language"] = "nl"
students_df["attribute_2 <PersonalData>"] = students_df["attribute_2 <PersonalData>"].fillna("true").replace("No", "false")

In [8]:
students_df

Unnamed: 0,Grade,attribute_3 <ClassID>,Nr,firstname,token,LongId,attribute_2 <PersonalData>,attribute_1 <Classmates>,language
0,1,1,1,Harold,123451,999990001,True,Fatima,nl
2,1,1,3,Fatima,123453,999990003,True,Harold,nl
3,2,2,1,Charissa,123454,999990004,False,"Dwayne, Abdul, Derek",nl
4,2,2,2,Dwayne,123455,999990005,False,"Abdul, Derek, Charissa",nl
5,2,2,3,Abdul,123456,999990006,True,"Derek, Dwayne, Charissa",nl
6,2,2,4,Derek,123457,999990007,True,"Charissa, Abdul, Dwayne",nl


In [9]:
students_output_file = participant_input_file.with_stem(participant_input_file.stem + "_LimeSurveyStudents")
print(students_output_file)
students_df.to_csv(students_output_file)

example_data/test_participantList_LimeSurveyStudents.csv


## Mentors file preparation

In [10]:
mentor_list = [j for k in [i.split('/') for i in school_df["Mentor"].unique()] for j in k]
mentors_df = pd.DataFrame({"firstname":mentor_list, "token":""})
for i, mentor in mentors_df.iterrows():
    mentors_df.at[i,pupils_column_name] = students_df[school_df["Mentor"].str.contains(mentor["firstname"])] \
        .firstname \
        .sample(frac=1, random_state=random_state).str.cat(sep=', ')
    token = f"{random_state.randint(100, 10**3 - 1):03}"
    while token in mentors_df["token"].values:
        token = f"{random_state.randint(100, 10**3 - 1):03}"
    mentors_df.at[i,"token"] = token

In [11]:
mentors_df["language"] = "nl"
mentors_df

Unnamed: 0,firstname,token,attribute_1 <Pupils>,language
0,Mister Something,254,"Harold, Fatima",nl
1,Mrs Else,783,"Fatima, Harold",nl
2,Mister This,894,"Dwayne, Derek, Charissa, Abdul",nl
3,Mrs That,980,"Dwayne, Charissa, Derek, Abdul",nl


In [12]:
mentors_output_file = participant_input_file.with_stem(participant_input_file.stem + "_LimeSurveyMentors")
print(mentors_output_file)
mentors_df.to_csv(mentors_output_file)

example_data/test_participantList_LimeSurveyMentors.csv


In [13]:
mentor_codes = mentors_df[['firstname','token']].rename(columns={"token":"Access Code"})
mentor_codes

Unnamed: 0,firstname,Access Code
0,Mister Something,254
1,Mrs Else,783
2,Mister This,894
3,Mrs That,980


In [14]:
mentors_token_file = participant_input_file.with_stem(participant_input_file.stem + "_LimeSurveyMentorCodes")
print(mentors_output_file)
mentor_codes.to_csv(mentors_token_file, index=False)

example_data/test_participantList_LimeSurveyMentors.csv
