In [1]:
import pandas as pd
import os

def load_coding_table():
    """
    Load the CODING sheet from the codebook.xlsx file into a pandas DataFrame.
    
    Returns:
        pandas.DataFrame: The coding table from the CODING sheet
    """
    # Define the path to the Excel file
    excel_path = os.path.join("..", "Data", "codebook.xlsx")
    
    try:
        # Read the CODING sheet from the Excel file
        coding_table = pd.read_excel(excel_path, sheet_name="CODING")
        
        print(f"Successfully loaded CODING table with {len(coding_table)} rows and {len(coding_table.columns)} columns")
        print(f"Columns: {list(coding_table.columns)}")
        
        return coding_table
    
    except FileNotFoundError:
        print(f"Error: File not found at {excel_path}")
        return None
    except Exception as e:
        print(f"Error reading Excel file: {str(e)}")
        return None

# Alternative function if you want to specify a custom path
def load_coding_table_from_path(file_path):
    """
    Load the CODING sheet from a specified Excel file path.
    
    Args:
        file_path (str): Path to the Excel file
        
    Returns:
        pandas.DataFrame: The coding table from the CODING sheet
    """
    try:
        # Read the CODING sheet from the Excel file
        coding_table = pd.read_excel(file_path, sheet_name="CODING")
        
        print(f"Successfully loaded CODING table with {len(coding_table)} rows and {len(coding_table.columns)} columns")
        print(f"Columns: {list(coding_table.columns)}")
        
        return coding_table
    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"Error reading Excel file: {str(e)}")
        return None


coding_data = load_coding_table()

Successfully loaded CODING table with 8847 rows and 59 columns
Columns: ['ResponseId', 'Q37_feedback_on_work', 'Q38_1_feedback_helpful_AI', 'Q38_2_feedback_helpful_teacher', 'Q39_1_feedback_trust_AI', 'Q39_2_feedback_trust_teacher', 'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?', 'Good Quote', 'Actor', 'Comparator', 'Characteristic', 'Actor.1', 'Comparator.1', 'Characteristic.1', 'Actor.2', 'Comparator.2', 'Characteristic.2', 'Actor.3', 'Comparator.3', 'Characteristic.3', 'Actor.4', 'Comparator.4', 'Characteristic.4', 'Actor.5', 'Comparator.5', 'Characteristic.5', 'Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?', 'Good Quote.1', 'Actor.6', 'Comparator.6', 'Characteristic.6', 'Actor.7', 'Comparator.7', 'Characteristic.7', 'Actor.8', 'Comparator.8', 'Characteristic.8', 'Actor.9', 'Comparator.9', 'Characteristic.9', 'Describe any other differences between the feedback you received fro

Let's do a list of the valids caractheristics and reasons

In [2]:
characteristics = [
    "Ease",
    "Speed", 
    "Volume",
    "Before submission",
    "After submission",
    "Less effort",
    "Understanding",
    "Reflection",
    "Progress",
    "Specificity",
    "In-depth",
    "Understandable",
    "Relevance",
    "Contextualised",
    "Utility",
    "Reliable",
    "Objective",
    "Positivity",
    "Negativity",
    "Positive",
    "Negative",
    "No impact",
    "Personal",
    "Risky",
    "Expert",
    "Importance"
]

reasons = [
    "Unaware",
    "Effortful", 
    "Less effort",
    "Specificity",
    "In-depth",
    "Contextualised",
    "Utility",
    "Trustworthy",
    "Positivity",
    "Personal",
    "Expert",
    "Preference",
    "Need",
    "Unsustainable",
    "Privacy",
]


Now, let's see if all the characteristics in the codebook match with this in the lists

In [3]:
# Get all columns that start with "Characteristic"
characteristic_columns = [col for col in coding_data.columns if col.startswith('Characteristic')]

# Extract all unique values from these columns into a set
characteristic_values = set()

for col in characteristic_columns:
    # Get non-null values from the column
    values = coding_data[col].dropna()
    # Add each value to the set
    characteristic_values.update(values)

# Convert to sorted list for easier viewing
characteristic_list = sorted(list(characteristic_values))

print(f"Found {len(characteristic_values)} unique characteristic values:")
print(characteristic_list)

# If you want just the set:
print(f"\nSet of characteristic values: {characteristic_values}")


Found 35 unique characteristic values:
[' neutral', ' off-loading', 'a submission', 'b submission', 'contextualised', 'ease', 'expert', 'h KHIG', 'h kHIG', 'h reflect', 'h understand', 'h understand ', 'importance', 'in-depth', 'm vulnerable', 'm vulnerable ', 'negative', 'negativity', 'neutral', 'objective', 'off-loading', 'positive', 'positivity', 'relational', 'relational ', 'relevance', 'reliable', 'reliable ', 'specificity', 'specificity ', 'speed', 'understandable', 'utility', 'volume', 'volume ']

Set of characteristic values: {'volume ', 'a submission', 'importance', ' off-loading', 'specificity ', 'h kHIG', 'contextualised', 'off-loading', 'positive', 'negativity', 'ease', 'expert', 'm vulnerable ', 'h understand', 'reliable ', 'h reflect', ' neutral', 'in-depth', 'neutral', 'negative', 'objective', 'volume', 'reliable', 'h KHIG', 'relevance', 'positivity', 'specificity', 'b submission', 'h understand ', 'speed', 'relational', 'utility', 'm vulnerable', 'understandable', 'rela

In [4]:
len(characteristics), len(characteristic_values)

(26, 35)

There is unequal number of variables. Let's see if they share variables or not

In [5]:
# Find variables in characteristic_values that are not in characteristics
not_in_characteristics = characteristic_values - set(characteristics)
print(f"Variables in characteristic_values but not in characteristics ({len(not_in_characteristics)}):")
print(sorted(list(not_in_characteristics)))


Variables in characteristic_values but not in characteristics (35):
[' neutral', ' off-loading', 'a submission', 'b submission', 'contextualised', 'ease', 'expert', 'h KHIG', 'h kHIG', 'h reflect', 'h understand', 'h understand ', 'importance', 'in-depth', 'm vulnerable', 'm vulnerable ', 'negative', 'negativity', 'neutral', 'objective', 'off-loading', 'positive', 'positivity', 'relational', 'relational ', 'relevance', 'reliable', 'reliable ', 'specificity', 'specificity ', 'speed', 'understandable', 'utility', 'volume', 'volume ']


In [6]:
len(not_in_characteristics)

35

In [7]:
print(characteristics)
print(characteristic_values)

['Ease', 'Speed', 'Volume', 'Before submission', 'After submission', 'Less effort', 'Understanding', 'Reflection', 'Progress', 'Specificity', 'In-depth', 'Understandable', 'Relevance', 'Contextualised', 'Utility', 'Reliable', 'Objective', 'Positivity', 'Negativity', 'Positive', 'Negative', 'No impact', 'Personal', 'Risky', 'Expert', 'Importance']
{'volume ', 'a submission', 'importance', ' off-loading', 'specificity ', 'h kHIG', 'contextualised', 'off-loading', 'positive', 'negativity', 'ease', 'expert', 'm vulnerable ', 'h understand', 'reliable ', 'h reflect', ' neutral', 'in-depth', 'neutral', 'negative', 'objective', 'volume', 'reliable', 'h KHIG', 'relevance', 'positivity', 'specificity', 'b submission', 'h understand ', 'speed', 'relational', 'utility', 'm vulnerable', 'understandable', 'relational '}


In [8]:
# Manually create a dict matching each value from the second list (set) to the closest match in the first list

characteristic_value_to_characteristic = {
    'reliable': 'Reliable',
    'reliable ': 'Reliable',
    'objective': 'Objective',
    'contextualised': 'Contextualised',
    'understandable': 'Understandable',
    'h reflect': 'Reflection',
    'm vulnerable': 'Risky',
    'm vulnerable ': 'Risky',
    'neutral': 'No impact',
    ' neutral': 'No impact',
    'negative': 'Negative',
    'negativity': 'Negativity',
    'expert': 'Expert',
    'off-loading': 'Less effort',
    ' off-loading': 'Less effort',
    'in-depth': 'In-depth',
    'volume': 'Volume',
    'volume ': 'Volume',
    'relevance': 'Relevance',
    'specificity': 'Specificity',
    'specificity ': 'Specificity',
    'h KHIG': 'Progress',
    'h kHIG': 'Progress',
    'b submission': 'Before submission',
    'a submission': 'After submission',
    'speed': 'Speed',
    'relational': 'Personal', # Relational is the theme or subtheme
    'relational ': 'Personal',
    'h understand': 'Understanding',
    'h understand ': 'Understanding', 
    'ease': 'Ease',
    'utility': 'Utility',
    'positivity': 'Positivity',
    'positive': 'Positive',
    'importance': 'Importance',
}

In [9]:
clean_coding_data = coding_data.copy()

# Get columns that start with 'Characteristic'
characteristic_cols = [col for col in clean_coding_data.columns if col.startswith('Characteristic')]

# Replace values in those columns using the mapping dictionary
for col in characteristic_cols:
    clean_coding_data[col] = clean_coding_data[col].map(characteristic_value_to_characteristic)


In [10]:
# Get all unique characteristics after mapping
all_characteristics = set()
for col in characteristic_cols:
    all_characteristics.update(clean_coding_data[col].dropna().unique())
print(len(all_characteristics))

# Our list of correct characteristics
correct_characteristics = set(characteristic_value_to_characteristic.values())
print(len(correct_characteristics))

# Check if they match
print("All characteristics found in data:", sorted(all_characteristics))
print("\nAll characteristics from mapping:", sorted(correct_characteristics))
print("\nAre they identical?", all_characteristics == correct_characteristics)


26
26
All characteristics found in data: ['After submission', 'Before submission', 'Contextualised', 'Ease', 'Expert', 'Importance', 'In-depth', 'Less effort', 'Negative', 'Negativity', 'No impact', 'Objective', 'Personal', 'Positive', 'Positivity', 'Progress', 'Reflection', 'Relevance', 'Reliable', 'Risky', 'Specificity', 'Speed', 'Understandable', 'Understanding', 'Utility', 'Volume']

All characteristics from mapping: ['After submission', 'Before submission', 'Contextualised', 'Ease', 'Expert', 'Importance', 'In-depth', 'Less effort', 'Negative', 'Negativity', 'No impact', 'Objective', 'Personal', 'Positive', 'Positivity', 'Progress', 'Reflection', 'Relevance', 'Reliable', 'Risky', 'Specificity', 'Speed', 'Understandable', 'Understanding', 'Utility', 'Volume']

Are they identical? True


In [11]:
# Count occurrences of each characteristic across all characteristic columns
characteristic_counts = {}

for col in characteristic_cols:
    # Count values in this column
    counts = clean_coding_data[col].value_counts()
    
    # Add these counts to our running totals
    for char, count in counts.items():
        if char in characteristic_counts:
            characteristic_counts[char] += count
        else:
            characteristic_counts[char] = count

# Convert to Series for nicer display and sort by frequency
char_freq = pd.Series(characteristic_counts).sort_values(ascending=False)

print("Frequency of each characteristic:")
print(char_freq)


Frequency of each characteristic:
Reliable             1143
Specificity          1118
Ease                  875
Contextualised        669
Speed                 614
Utility               590
Personal              471
Understanding         313
Risky                 306
Volume                304
Understandable        297
In-depth              254
Positive              247
Objective             198
Positivity            188
Importance            124
Relevance             123
Expert                119
No impact             115
Negative              109
Reflection             86
Negativity             80
Less effort            59
After submission       45
Before submission      31
Progress               20
dtype: int64


Let's do the same normalisation with the reasons

In [12]:
# Let's extract and compare reasons

# Get reason columns
reason_cols = [col for col in clean_coding_data.columns if col.startswith('Reason')]

# Get all unique reasons from the data
all_reasons = set()
for col in reason_cols:
    all_reasons.update(clean_coding_data[col].dropna().unique())
print("Number of unique reasons in data:", len(all_reasons))

# Compare with our predefined list of reasons
print("Number of reasons in predefined list:", len(reasons))

# Compare the sets
print("\nAll reasons found in data:", sorted(all_reasons))
print("\nAll reasons from predefined list:", sorted(reasons))
print("\nAre they identical?", all_reasons == reasons)

Number of unique reasons in data: 32
Number of reasons in predefined list: 15

All reasons found in data: ['Context', 'Expert', 'In-depth', 'Integrity', 'NIL', 'Nil', 'Positivity', 'Preference', 'Privacy', 'Relational', 'Trust', 'Unaware', 'Unsustainable', 'Utility', 'context', 'effort', 'expert', 'in-depth', 'integrity', 'need', 'nil', 'off-loading', 'positivity', 'preference', 'privacy', 'relational', 'specificity', 'trust', 'trust ', 'unaware', 'unsustainable', 'utility']

All reasons from predefined list: ['Contextualised', 'Effortful', 'Expert', 'In-depth', 'Less effort', 'Need', 'Personal', 'Positivity', 'Preference', 'Privacy', 'Specificity', 'Trustworthy', 'Unaware', 'Unsustainable', 'Utility']

Are they identical? False


In [13]:
reason_mapping = {
    'Context': 'Contextualised',
    'context': 'Contextualised',
    'Expert': 'Expert',
    'expert': 'Expert',
    'In-depth': 'In-depth',
    'in-depth': 'In-depth',
    'Integrity': 'Integrity',
    'integrity': 'Integrity',
    'NIL': 'Unaware',
    'Nil': 'Unaware',
    'nil': 'Unaware',
    'Positivity': 'Positivity',
    'positivity': 'Positivity',
    'Preference': 'Preference',
    'preference': 'Preference',
    'Privacy': 'Privacy',
    'privacy': 'Privacy',
    'Relational': 'Personal',
    'relational': 'Personal',
    'Trust': 'Trustworthy',
    'trust': 'Trustworthy',
    'trust ': 'Trustworthy',
    'Unaware': 'Unaware',
    'unaware': 'Unaware',
    'Unsustainable': 'Unsustainable',
    'unsustainable': 'Unsustainable',
    'Utility': 'Utility',
    'utility': 'Utility',
    'effort': 'Effortful',
    'off-loading': 'Less effort',
    'need': 'Need',
    'specificity': 'Specificity'
}

In [14]:
# Replace values in Reason columns using the mapping dictionary
reason_columns = [col for col in clean_coding_data.columns if col.startswith('Reason')]

for col in reason_columns:
    clean_coding_data[col] = clean_coding_data[col].map(reason_mapping)



In [15]:
# Get all unique reasons after mapping
all_reasons = set()
for col in reason_columns:
    all_reasons.update(clean_coding_data[col].dropna().unique())
print(len(all_reasons))

# Our list of correct reasons
correct_reasons = set(reason_mapping.values())
print(len(correct_reasons))

# Check if they match
print("All reasons found in data:", sorted(all_reasons))
print("\nAll reasons from mapping:", sorted(correct_reasons))
print("\nAre they identical?", all_reasons == correct_reasons)


16
16
All reasons found in data: ['Contextualised', 'Effortful', 'Expert', 'In-depth', 'Integrity', 'Less effort', 'Need', 'Personal', 'Positivity', 'Preference', 'Privacy', 'Specificity', 'Trustworthy', 'Unaware', 'Unsustainable', 'Utility']

All reasons from mapping: ['Contextualised', 'Effortful', 'Expert', 'In-depth', 'Integrity', 'Less effort', 'Need', 'Personal', 'Positivity', 'Preference', 'Privacy', 'Specificity', 'Trustworthy', 'Unaware', 'Unsustainable', 'Utility']

Are they identical? True


In [16]:
# Count occurrences of each reason across all reason columns
reason_counts = pd.Series()
for col in reason_columns:
    reason_counts = reason_counts.add(clean_coding_data[col].value_counts(), fill_value=0)

# Sort in descending order and display
print("Frequency of each reason:")
print(reason_counts.sort_values(ascending=False))


Frequency of each reason:
Unaware           1043.0
Trustworthy        976.0
Integrity          310.0
Contextualised     225.0
Need               214.0
Personal           204.0
Expert             155.0
Preference         145.0
Privacy             93.0
Utility             40.0
In-depth            32.0
Effortful           24.0
Less effort         18.0
Unsustainable        5.0
Specificity          4.0
Positivity           3.0
dtype: object


## Transformation

In [17]:
{
    "ResponseId": "R_4ksNFrS9UmgYiAd",
    "Q37_feedback_on_work": 1,
    "Q38_1_feedback_helpful_AI": 4,
    "Q38_2_feedback_helpful_teacher": 5,
    "Q39_1_feedback_trust_AI": 3,
    "Q39_2_feedback_trust_teacher": 5,
    "Question 40": {
        "Question": "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?",
        "Response": "GENAI does not have the nuance, it is broad and it is also a people pleaser",
        "GoodQuote": "",
        "Codes": [
            {
                "Actor": "A", 
                "Comparator": "less", 
                "Characteristic": "In-depth"
             },
            {
                "Actor": "A", 
                "Comparator": "more",
                "Characteristic": "Posivity"
             }
        ]
    },
    "Question 41": {
        "Question": "Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?",
        "Response": "GGetting feedback from GENAI makes me feel nothing. I know it is AI working off Machine learning based on what it has learnt before. Furthermore, it is programmed it a certain way.",
        "GoodQuote": "",
        "Codes": [
            {
                "Actor": "A", 
                "Comparator": "less", 
                "Characteristic": "No impact"
             }
        ]
    },
    "Question 42": {
        "Question": "Describe any other differences between the feedback you received from GenAI and your teacher.",
        "Response": "Genai is easier to get but teacher more reliable",
        "GoodQuote": "",
        "Codes": [
            {
                "Actor": "A", 
                "Comparator": "more", 
                "Characteristic": "Ease"
             },
            {
                "Actor": "T", 
                "Comparator": "more", 
                "Characteristic": "Reliable"
             }
        ]
    },
    "Question 43": {
        "Question": "Why didn’t you use GenAI for feedback on your work?",
        "Response": "I don't trust it, I would rather use the universities resources such as student helpers.",
        "GoodQuote": "",
        "Reasons": [
            "Unaware",
            "Effortful",
        ]
    }
}

{'ResponseId': 'R_4ksNFrS9UmgYiAd',
 'Q37_feedback_on_work': 1,
 'Q38_1_feedback_helpful_AI': 4,
 'Q38_2_feedback_helpful_teacher': 5,
 'Q39_1_feedback_trust_AI': 3,
 'Q39_2_feedback_trust_teacher': 5,
 'Question 40': {'Question': 'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?',
  'Response': 'GENAI does not have the nuance, it is broad and it is also a people pleaser',
  'GoodQuote': '',
  'Codes': [{'Actor': 'A', 'Comparator': 'less', 'Characteristic': 'In-depth'},
   {'Actor': 'A', 'Comparator': 'more', 'Characteristic': 'Posivity'}]},
 'Question 41': {'Question': 'Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?',
  'Response': 'GGetting feedback from GENAI makes me feel nothing. I know it is AI working off Machine learning based on what it has learnt before. Furthermore, it is programmed it a certain way.',
  'GoodQuote': '',
  'Codes': [{'Actor': 'A',
    'Comparator': 'less',
 

In [18]:
clean_coding_data.columns


Index(['ResponseId', 'Q37_feedback_on_work', 'Q38_1_feedback_helpful_AI',
       'Q38_2_feedback_helpful_teacher', 'Q39_1_feedback_trust_AI',
       'Q39_2_feedback_trust_teacher',
       'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?',
       'Good Quote', 'Actor', 'Comparator', 'Characteristic', 'Actor.1',
       'Comparator.1', 'Characteristic.1', 'Actor.2', 'Comparator.2',
       'Characteristic.2', 'Actor.3', 'Comparator.3', 'Characteristic.3',
       'Actor.4', 'Comparator.4', 'Characteristic.4', 'Actor.5',
       'Comparator.5', 'Characteristic.5',
       'Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?',
       'Good Quote.1', 'Actor.6', 'Comparator.6', 'Characteristic.6',
       'Actor.7', 'Comparator.7', 'Characteristic.7', 'Actor.8',
       'Comparator.8', 'Characteristic.8', 'Actor.9', 'Comparator.9',
       'Characteristic.9',
       'Describe any other differences betwe

In [19]:
import json
from pathlib import Path
import pandas as pd

# ---------- helpers ----------
def is_filled(x):
    if pd.isna(x): return False
    if isinstance(x, str): return x.strip() != ""
    return True

def get_stripped(x):
    if not is_filled(x): return None
    return str(x).strip()

def add_if_numeric(out: dict, row: pd.Series, col: str):
    """If row[col] exists and is numeric-like, store as int; else store stripped string."""
    val = row.get(col)
    if not is_filled(val):
        return
    try:
        out[col] = int(float(val))
    except Exception:
        s = str(val).strip()
        if s != "":
            out[col] = s

def collect_codes(row, ranges):
    codes = []
    for a_col, c_col, ch_col in ranges:
        a = get_stripped(row.get(a_col))
        c = get_stripped(row.get(c_col))
        ch = get_stripped(row.get(ch_col))
        if any([a, c, ch]):
            code = {}
            if a:  code["Actor"] = a
            if c:  code["Comparator"] = c
            if ch: code["Characteristic"] = ch
            codes.append(code)
    return codes

# ---------- main ----------
def export_survey_rows_to_json(df: pd.DataFrame, out_dir="survey_results"):
    out_path = Path(out_dir)
    out_path.mkdir(parents=True, exist_ok=True)

    # Q groups / columns
    q40_q = "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?"
    q40_quote = "Good Quote"
    q40_ranges = [("Actor", "Comparator", "Characteristic")] + [
        (f"Actor.{i}", f"Comparator.{i}", f"Characteristic.{i}") for i in range(1, 6)
    ]

    q41_q = "Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?"
    q41_quote = "Good Quote.1"
    q41_ranges = [(f"Actor.{i}", f"Comparator.{i}", f"Characteristic.{i}") for i in range(6, 10)]

    q42_q = "Describe any other differences between the feedback you received from GenAI and your teacher."
    q42_quote = "Good Quote.2"
    # Note: "Characteristic 11" (with a space) per your schema
    q42_ranges = [(f"Actor.{i}", f"Comparator.{i}", f"Characteristic.{i}") for i in range(10, 14)]

    q43_q = "Why didn’t you use GenAI for feedback on your work?"
    q43_quote = "Good quote"
    q43_reasons = ["Reason 1", "Reason 2", "Reason 3"]

    numeric_cols = [
        "Q37_feedback_on_work",
        "Q38_1_feedback_helpful_AI",
        "Q38_2_feedback_helpful_teacher",
        "Q39_1_feedback_trust_AI",
        "Q39_2_feedback_trust_teacher",
    ]

    written_files = []

    for _, row in df.iterrows():
        rid = get_stripped(row.get("ResponseId"))
        if not rid:
            continue

        out = {"ResponseId": rid}

        # Add numeric / scalar fields
        for col in numeric_cols:
            add_if_numeric(out, row, col)

        # Q40
        q40_ans = get_stripped(row.get(q40_q))
        q40_gq = get_stripped(row.get(q40_quote))
        q40_codes = collect_codes(row, q40_ranges)
        if any([q40_ans, q40_gq, q40_codes]):
            out["Question 40"] = {
                "Question": q40_q,
                "Response": q40_ans or "",
                "GoodQuote": q40_gq or "",
            }
            if q40_codes:
                out["Question 40"]["Codes"] = q40_codes

        # Q41
        q41_ans = get_stripped(row.get(q41_q))
        q41_gq = get_stripped(row.get(q41_quote))
        q41_codes = collect_codes(row, q41_ranges)
        if any([q41_ans, q41_gq, q41_codes]):
            out["Question 41"] = {
                "Question": q41_q,
                "Response": q41_ans or "",
                "GoodQuote": q41_gq or "",
            }
            if q41_codes:
                out["Question 41"]["Codes"] = q41_codes

        # Q42
        q42_ans = get_stripped(row.get(q42_q))
        q42_gq = get_stripped(row.get(q42_quote))
        q42_codes = collect_codes(row, q42_ranges)
        if any([q42_ans, q42_gq, q42_codes]):
            out["Question 42"] = {
                "Question": q42_q,
                "Response": q42_ans or "",
                "GoodQuote": q42_gq or "",
            }
            if q42_codes:
                out["Question 42"]["Codes"] = q42_codes

        # Q43
        q43_ans = get_stripped(row.get(q43_q))
        q43_gq = get_stripped(row.get(q43_quote))
        reasons = [get_stripped(row.get(c)) for c in q43_reasons]
        reasons = [r for r in reasons if r]
        if any([q43_ans, q43_gq, reasons]):
            out["Question 43"] = {
                "Question": q43_q,
                "Response": q43_ans or "",
                "GoodQuote": q43_gq or "",
            }
            if reasons:
                out["Question 43"]["Reasons"] = reasons

        # Only write if at least one Question 40–43 exists
        if any(k.startswith("Question ") for k in out.keys()):
            fp = out_path / f"{rid}.json"
            with open(fp, "w", encoding="utf-8") as f:
                json.dump(out, f, ensure_ascii=False, indent=2)
            written_files.append(str(fp))

    return written_files


In [20]:
files = export_survey_rows_to_json(clean_coding_data, out_dir="survey_results")
print(f"Wrote {len(files)} files")

Wrote 5454 files


In [21]:
clean_coding_data[clean_coding_data["ResponseId"]=="R_4q2uHXW2PPCK345"].values


array([['R_4q2uHXW2PPCK345', 1.0, 4.0, 5.0, 3.0, 5.0,
        'Learning from AI is quick when you have questions but the teacher (supervisor) is better at explaining related to your context.',
        nan, 'A', 'more', 'Speed', 'T', 'more', 'Contextualised', nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        'Maybe just more of a human connection with the teacher.', nan,
        'T', 'more', 'Personal', nan, nan, nan, nan, nan, nan, nan, nan,
        nan,
        'feedback from AI especially when asking for code for an analysis is very hit and miss a teacher would have a better understanding on what you are trying to accomplish',
        nan, 'T', 'more', 'Contextualised', 'A', 'less', 'Reliable', nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]], dtype=object)

In [22]:
clean_coding_data.columns

Index(['ResponseId', 'Q37_feedback_on_work', 'Q38_1_feedback_helpful_AI',
       'Q38_2_feedback_helpful_teacher', 'Q39_1_feedback_trust_AI',
       'Q39_2_feedback_trust_teacher',
       'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?',
       'Good Quote', 'Actor', 'Comparator', 'Characteristic', 'Actor.1',
       'Comparator.1', 'Characteristic.1', 'Actor.2', 'Comparator.2',
       'Characteristic.2', 'Actor.3', 'Comparator.3', 'Characteristic.3',
       'Actor.4', 'Comparator.4', 'Characteristic.4', 'Actor.5',
       'Comparator.5', 'Characteristic.5',
       'Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?',
       'Good Quote.1', 'Actor.6', 'Comparator.6', 'Characteristic.6',
       'Actor.7', 'Comparator.7', 'Characteristic.7', 'Actor.8',
       'Comparator.8', 'Characteristic.8', 'Actor.9', 'Comparator.9',
       'Characteristic.9',
       'Describe any other differences betwe