In [1]:
import plotly.express as px
import plotly.io as pio

#pio.renderers.default = "iframe_connected"
pio.renderers.default = "vscode"


In [2]:
from pathlib import Path
import sys

# Go two levels up: from notebooks/ -> parenthood_europe/ -> CodingProjects/
project_root = Path().resolve().parents[0]
sys.path.insert(0, str(project_root))
sys.path.append (str(project_root)+"/libs")


In [3]:
list(Path().resolve().parents)


[PosixPath('/Users/Paula_1/CodingProjects/parenthood/parenthood_europe'),
 PosixPath('/Users/Paula_1/CodingProjects/parenthood'),
 PosixPath('/Users/Paula_1/CodingProjects'),
 PosixPath('/Users/Paula_1'),
 PosixPath('/Users'),
 PosixPath('/')]

In [4]:
(str(project_root)+"/libs")


'/Users/Paula_1/CodingProjects/parenthood/parenthood_europe/libs'

In [3]:
from scripts.parse_survey_data import load_survey_data
from libs.questions.numeric import NumericQuestion
from libs.questions.single_choice import SingleChoiceQuestion
from libs.questions.multiple_choice import MultipleChoiceQuestion
from libs.questions.matrix import MatrixQuestion

In [6]:
from pathlib import Path
print("Current working directory:", Path().resolve())


Current working directory: /Users/Paula_1/CodingProjects/parenthood/parenthood_europe/notebooks


In [15]:
df, df_raw = load_survey_data(file_path = "../data/parenthood_test_QNumeric.xlsx")
print(df.loc[1, "DE1"])


In what year were you born?


In [5]:
df, df_raw = load_survey_data(file_path = "../data/parenthood_test_QNumeric.xlsx")

q1 = NumericQuestion("DE1", df, df_raw)
fig1 = q1.distribution(display=False)
fig1.show()

In [5]:
print([col for col in df.columns if col.startswith("DE10")])


['DE10_1', 'DE10_3', 'DE10_4', 'DE10_5', 'DE10_6', 'DE10_7', 'DE10_9']


In [6]:
numeric_question_ids = ["DE1", "DE10", "DE11", "DE22"]
for qid in numeric_question_ids:
    q = NumericQuestion(qid, df, df_raw)
    fig = q.distribution(display=False)
    if fig is not None:
        fig.show()


In [5]:
df, df_raw = load_survey_data(file_path = "../data/parenthood_test_QSingle.xlsx")

single_choice_question_ids = ["DE2", "DE4", "DE5", "DE6", "DE8", "DE12", "DE17", "DE18", "DE19", "DE20", "DE21", "PL3", "PL5", "PL8", "PL10", "CS2"]
figs = []
for qid in single_choice_question_ids:
    q = SingleChoiceQuestion(qid, df, df_raw)
    fig = q.distribution(display=False)
    figs.append(fig)

for fig in figs:
    if fig is not None:
        fig.show()

In [11]:
print("Column DE2 type:", df["DE2"].dtype)
print("Unique values in DE2:", df["DE2"].unique())


Column DE2 type: object
Unique values in DE2: ['DE2' 'What is your gender identity?' 1 2 nan 3 4]


In [None]:
value = df["DE2"].iloc[3]  # 3 means the 4th row (0-based indexing)
print("Value at row 3, col 'DE2':", value)
print("Type of that value:", type(value))


Value at row 3, col 'DE2': 2
Type of that value: <class 'int'>


In [10]:
df, df_raw = load_survey_data(file_path = "../data/parenthood_test_QMultiple.xlsx")
print(df.loc[1, "DE24"])

Could you please share the primary reason for not having children? - Selected Choice


In [14]:
value = df["DE3"].iloc[3]  
print("Value at row 3, col 'DE3':", value)
print("Type of that value:", type(value))


Value at row 3, col 'DE3': 1
Type of that value: <class 'str'>


In [6]:
# MultipleChoice Question

df, df_raw = load_survey_data(file_path="../data/parenthood_test_QMultiple.xlsx") #TODO: how to plot multiple choice questions?

multiple_choice_question_ids = ["DE3", "DE7", "DE9", "DE24"] 

for qid in multiple_choice_question_ids:
    q = MultipleChoiceQuestion(qid, df, df_raw)
    fig = q.distribution(display=False)
    if fig is not None:
        fig.show()


In [4]:
df, df_raw = load_survey_data(file_path="../data/parenthood_test_QMatrix.xlsx")

In [4]:
# MatrixChoice Question
# true identification of the parent (their gender or identity) only exists because anchored with DE14. We assume in further questions they will keep the order to answer to the gender to parent 1 and gender of parent 2.

df, df_raw = load_survey_data(file_path="../data/parenthood_test_QMatrix.xlsx")

gender_lookup = {}

multiple_choice_question_ids = ["DE14", "DE15", "DE16", "DE23"]#, , "DE23", "PL1", "PL2", "PL4", "PL6", "PL7", "PL9", "CS1", "CS3", "CC1", "CC2", "GB4", "GB5", ] #DE23?

for qid in multiple_choice_question_ids:
    q = MatrixQuestion(qid, df, df_raw, gender_lookup=gender_lookup)
    fig = q.distribution(display=False)
    if fig is not None:
        fig.show()


In [7]:
value = df["DE14_1"].iloc[3]  # 3 means the 4th row (0-based indexing)
print("Value at row 3, col 'DE14_1':", value)
print("Type of that value:", type(value))

Value at row 3, col 'DE14_1': 2
Type of that value: <class 'int'>


In [None]:
class Respondent:
    def __init__(self, respondent_id, df_raw, metadata):
        self.id = respondent_id
        self.row = df_raw.loc[respondent_id]
        self.metadata = metadata

    def get_parent_gender(self, parent_number):
        col = f"DE14_{parent_number}"
        try:
            code = self.row[col]
            if pd.isna(code):
                return None
            return self.metadata["DE14"]["value_map"].get(int(code))
        except (KeyError, ValueError, TypeError):
            return None

    def get_answer(self, question_id, parent_number):
        col = f"{question_id}_{parent_number}"
        try:
            return self.row[col]
        except KeyError:
            return None


In [None]:
respondent = Respondent(42, df_raw, metadata)

print(respondent.get_parent_gender("1"))  # → "Woman"
print(respondent.get_answer("DE15", "1"))  # → e.g., 2 (education level)


In [8]:
# Questions about Beliefs about gender (in)equality: (all numeric) IN1 (window in percent),  IN2(in percent), IN3 (in years)
# Descriptive and injunctive social norms on academic productivity: AP1 (matrix styled: scholarly works in numbers), AP2 (singlechoice), AP3 (singlechoice)
# Network Satisfaction GB1a (singlechoice), GB1b (singlechoice), GB1c (singlechoice), GB2a (singlechoice), GB2b (singlechoice), GB3 (matrix) 
# Leaving Academia: LA1, 2, 3 (singlechoice), LA4a, b (Text Input window)
# Final Questions C1 singlechoice, C2 (text input window)