In [1]:
import pandas as pd
import os

def load_coding_table():
    """
    Load the CODING sheet from the codebook.xlsx file into a pandas DataFrame.
    
    Returns:
        pandas.DataFrame: The coding table from the CODING sheet
    """
    # Define the path to the Excel file
    excel_path = os.path.join("..", "Data", "codebook.xlsx")
    
    try:
        # Read the CODING sheet from the Excel file
        coding_table = pd.read_excel(excel_path, sheet_name="CODING")
        
        print(f"Successfully loaded CODING table with {len(coding_table)} rows and {len(coding_table.columns)} columns")
        print(f"Columns: {list(coding_table.columns)}")
        
        return coding_table
    
    except FileNotFoundError:
        print(f"Error: File not found at {excel_path}")
        return None
    except Exception as e:
        print(f"Error reading Excel file: {str(e)}")
        return None

# Alternative function if you want to specify a custom path
def load_coding_table_from_path(file_path):
    """
    Load the CODING sheet from a specified Excel file path.
    
    Args:
        file_path (str): Path to the Excel file
        
    Returns:
        pandas.DataFrame: The coding table from the CODING sheet
    """
    try:
        # Read the CODING sheet from the Excel file
        coding_table = pd.read_excel(file_path, sheet_name="CODING")
        
        print(f"Successfully loaded CODING table with {len(coding_table)} rows and {len(coding_table.columns)} columns")
        print(f"Columns: {list(coding_table.columns)}")
        
        return coding_table
    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"Error reading Excel file: {str(e)}")
        return None


coding_data = load_coding_table()

Successfully loaded CODING table with 8847 rows and 59 columns
Columns: ['ResponseId', 'Q37_feedback_on_work', 'Q38_1_feedback_helpful_AI', 'Q38_2_feedback_helpful_teacher', 'Q39_1_feedback_trust_AI', 'Q39_2_feedback_trust_teacher', 'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?', 'Good Quote', 'Actor', 'Comparator', 'Characteristic', 'Actor.1', 'Comparator.1', 'Characteristic.1', 'Actor.2', 'Comparator.2', 'Characteristic.2', 'Actor.3', 'Comparator.3', 'Characteristic.3', 'Actor.4', 'Comparator.4', 'Characteristic.4', 'Actor.5', 'Comparator.5', 'Characteristic.5', 'Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?', 'Good Quote.1', 'Actor.6', 'Comparator.6', 'Characteristic.6', 'Actor.7', 'Comparator.7', 'Characteristic.7', 'Actor.8', 'Comparator.8', 'Characteristic.8', 'Actor.9', 'Comparator.9', 'Characteristic.9', 'Describe any other differences between the feedback you received fro

Let's do a list of the valids caractheristics and reasons

In [10]:
characteristics = [
    "Ease",
    "Speed", 
    "Volume",
    "Before submission",
    "After submission",
    "Less effort",
    "Understanding",
    "Reflection",
    "Progress",
    "Specificity",
    "In-depth",
    "Understandable",
    "Relevance",
    "Contextualised",
    "Utility",
    "Reliable",
    "Objective",
    "Positivity",
    "Negativity",
    "Positive",
    "Negative",
    "No impact",
    "Personal",
    "Risky",
    "Expert",
    "Importance"
]

reasons = [
    "Unaware",
    "Effortful", 
    "Less effort",
    "Specificity",
    "In-depth",
    "Contextualised",
    "Utility",
    "Trustworthy",
    "Positivity",
    "Personal",
    "Expert",
    "Preference",
    "Need",
    "Unsustainable",
    "Privacy",
]


Now, let's see if all the characteristics in the codebook match with this in the lists

In [3]:
# Get all columns that start with "Characteristic"
characteristic_columns = [col for col in coding_data.columns if col.startswith('Characteristic')]

# Extract all unique values from these columns into a set
characteristic_values = set()

for col in characteristic_columns:
    # Get non-null values from the column
    values = coding_data[col].dropna()
    # Add each value to the set
    characteristic_values.update(values)

# Convert to sorted list for easier viewing
characteristic_list = sorted(list(characteristic_values))

print(f"Found {len(characteristic_values)} unique characteristic values:")
print(characteristic_list)

# If you want just the set:
print(f"\nSet of characteristic values: {characteristic_values}")


Found 35 unique characteristic values:
[' neutral', ' off-loading', 'a submission', 'b submission', 'contextualised', 'ease', 'expert', 'h KHIG', 'h kHIG', 'h reflect', 'h understand', 'h understand ', 'importance', 'in-depth', 'm vulnerable', 'm vulnerable ', 'negative', 'negativity', 'neutral', 'objective', 'off-loading', 'positive', 'positivity', 'relational', 'relational ', 'relevance', 'reliable', 'reliable ', 'specificity', 'specificity ', 'speed', 'understandable', 'utility', 'volume', 'volume ']

Set of characteristic values: {'reliable', 'objective', 'contextualised', 'understandable', 'h reflect', 'm vulnerable', 'neutral', 'negative', 'expert', ' off-loading', 'm vulnerable ', 'in-depth', 'volume', 'volume ', 'relevance', 'specificity', 'h KHIG', 'b submission', 'speed', 'relational', 'h understand', 'ease', 'specificity ', 'h understand ', 'off-loading', 'relational ', 'utility', 'h kHIG', 'reliable ', 'negativity', 'a submission', 'positivity', ' neutral', 'importance', 'p

In [12]:
len(characteristics), len(characteristic_values)

(26, 35)

There is unequal number of variables. Let's see if they share variables or not

In [13]:
# Find variables in characteristic_values that are not in characteristics
not_in_characteristics = characteristic_values - set(characteristics)
print(f"Variables in characteristic_values but not in characteristics ({len(not_in_characteristics)}):")
print(sorted(list(not_in_characteristics)))


Variables in characteristic_values but not in characteristics (35):
[' neutral', ' off-loading', 'a submission', 'b submission', 'contextualised', 'ease', 'expert', 'h KHIG', 'h kHIG', 'h reflect', 'h understand', 'h understand ', 'importance', 'in-depth', 'm vulnerable', 'm vulnerable ', 'negative', 'negativity', 'neutral', 'objective', 'off-loading', 'positive', 'positivity', 'relational', 'relational ', 'relevance', 'reliable', 'reliable ', 'specificity', 'specificity ', 'speed', 'understandable', 'utility', 'volume', 'volume ']


In [14]:
len(not_in_characteristics)

35

In [15]:
print(characteristics)
print(characteristic_values)

['Ease', 'Speed', 'Volume', 'Before submission', 'After submission', 'Less effort', 'Understanding', 'Reflection', 'Progress', 'Specificity', 'In-depth', 'Understandable', 'Relevance', 'Contextualised', 'Utility', 'Reliable', 'Objective', 'Positivity', 'Negativity', 'Positive', 'Negative', 'No impact', 'Personal', 'Risky', 'Expert', 'Importance']
{'reliable', 'objective', 'contextualised', 'understandable', 'h reflect', 'm vulnerable', 'neutral', 'negative', 'expert', ' off-loading', 'm vulnerable ', 'in-depth', 'volume', 'volume ', 'relevance', 'specificity', 'h KHIG', 'b submission', 'speed', 'relational', 'h understand', 'ease', 'specificity ', 'h understand ', 'off-loading', 'relational ', 'utility', 'h kHIG', 'reliable ', 'negativity', 'a submission', 'positivity', ' neutral', 'importance', 'positive'}


In [None]:
# Manually create a dict matching each value from the second list (set) to the closest match in the first list

characteristic_value_to_characteristic = {
    'reliable': 'Reliable',
    'reliable ': 'Reliable',
    'objective': 'Objective',
    'contextualised': 'Contextualised',
    'understandable': 'Understandable',
    'h reflect': 'Reflection',
    'm vulnerable': 'Personal',
    'm vulnerable ': 'Personal',
    'neutral': 'No impact',
    ' neutral': 'No impact',
    'negative': 'Negative',
    'negativity': 'Negativity',
    'expert': 'Expert',
    'off-loading': 'Less effort',
    ' off-loading': 'Less effort',
    'in-depth': 'In-depth',
    'volume': 'Volume',
    'volume ': 'Volume',
    'relevance': 'Relevance',
    'specificity': 'Specificity',
    'specificity ': 'Specificity',
    'h KHIG': 'Progress',
    'h kHIG': 'Progress',
    'b submission': 'Before submission',
    'a submission': 'After submission',
    'speed': 'Speed',
    'relational': 'Personal',
    'relational ': 'Personal',
    'h understand': 'Understanding',
    'h understand ': 'Understanding',
    'ease': 'Ease',
    'utility': 'Utility',
    'positivity': 'Positivity',
    'positive': 'Positive',
    'importance': 'Importance',
}