In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('../checked-data.csv')

In [3]:

# Compute the total number of True responses per row and add it as a new column
df['total_score'] = df[['q1', 'q2', 'q3', 'q4', 'q5']].sum(axis=1)

# Display a sample to verify
print(df[['image', 'total_score']].head())

                      image  total_score
0     er_posttest_lab_1.png            4
1  er_posttest_lab_1004.png            2
2  er_posttest_lab_1015.png            4
3  er_posttest_lab_1017.png            3
4  er_posttest_lab_1036.png            5


In [17]:
# Step 1: Split the dataset into "lab" and "non-lab" groups
lab_data = df[df['image'].str.contains('lab', case=False, na=False)].copy()
# print(lab_data)
classroom_data = df[~df['image'].str.contains('lab', case=False, na=False)].copy()
# print(classroom_data)

In [None]:
# function to calculate the deltas:)
def calculate_delta(group):
    pretest_data = group[group['image'].str.contains('pretest', case=False, na=False)]
    posttest_data = group[group['image'].str.contains('posttest', case=False, na=False)]
    
    deltas = []

    for _, pretest_row in pretest_data.iterrows():
        # Extract parts of the filename
        filename_parts = pretest_row['image'].replace('.png', '').split('_')
        if len(filename_parts) != 4:
            print(f"Skipping malformed filename: {pretest_row['image']}")
            continue
        
        answer_type, _, group_type, roll = filename_parts
        match_key = f"{answer_type}_posttest_{group_type}_{roll}.png"

        # Look for an exact filename match in posttest
        matched_row = posttest_data[posttest_data['image'] == match_key]

        if not matched_row.empty:
            pre_score = pretest_row['total_score']
            post_score = matched_row.iloc[0]['total_score']
            delta = post_score - pre_score
            deltas.append((pretest_row['image'], delta))
        else:
            print(f"No match found for: {pretest_row['image']} → {match_key}")
    
    return deltas


In [23]:
# Step 2: Calculate deltas for lab group
lab_deltas = calculate_delta(lab_data)

# Print the results
# print("Lab Deltas (Pretest to Posttest):")
# for image, delta in lab_deltas:
#     print(f"{image}: {delta}")

No match found for: er_pretest_lab_833.png → er_posttest_lab_833.png


In [24]:

# Step 3: Calculate deltas for non-lab group
classroom_deltas = calculate_delta(classroom_data)

# print("\nClassRoom Deltas (Pretest to Posttest):")
# for image, delta in classroom_deltas:
#     print(f"{image}: {delta}")

No match found for: normalization_pretest_watrin_831.png → normalization_posttest_watrin_831.png


In [None]:
# function to calculate the summary statistics
def summarize_scores(group, group_name):
    pretest_scores = group[group['image'].str.contains('pretest', case=False)]['total_score']
    posttest_scores = group[group['image'].str.contains('posttest', case=False)]['total_score']

    print(f"\n===== {group_name.upper()} GROUP SUMMARY =====")

    print("\n--- Pretest Summary ---")
    print(f"Mean: {pretest_scores.mean():.2f}")
    print(f"Median: {pretest_scores.median():.2f}")
    print(f"Standard Deviation: {pretest_scores.std():.2f}")
    print(f"Min: {pretest_scores.min()}")
    print(f"Max: {pretest_scores.max()}")

    print("\n--- Posttest Summary ---")
    print(f"Mean: {posttest_scores.mean():.2f}")
    print(f"Median: {posttest_scores.median():.2f}")
    print(f"Standard Deviation: {posttest_scores.std():.2f}")
    print(f"Min: {posttest_scores.min()}")
    print(f"Max: {posttest_scores.max()}")

# functions to summarize the statistics from delta:)
def summarize_deltas(deltas, group_name):
    delta_values = [d[1] for d in deltas]

    print(f"\n===== Delta Summary for {group_name.upper()} Group =====")
    print(f"Mean Delta: {np.mean(delta_values):.2f}")
    print(f"Median Delta: {np.median(delta_values):.2f}")
    print(f"Standard Deviation: {np.std(delta_values):.2f}")
    print(f"Min Delta: {np.min(delta_values)}")
    print(f"Max Delta: {np.max(delta_values)}")

    print(f"\nDelta Counts:")
    print(f"  Positive change: {sum(1 for d in delta_values if d > 0)}")
    print(f"  Negative change: {sum(1 for d in delta_values if d < 0)}")
    print(f"  No change: {sum(1 for d in delta_values if d == 0)}")


In [32]:
summarize_scores(lab_data, "Lab")
summarize_scores(classroom_data, "Watrin")


===== LAB GROUP SUMMARY =====

--- Pretest Summary ---
Mean: 2.07
Median: 2.00
Standard Deviation: 1.05
Min: 0
Max: 4

--- Posttest Summary ---
Mean: 2.95
Median: 3.00
Standard Deviation: 1.32
Min: 0
Max: 5

===== WATRIN GROUP SUMMARY =====

--- Pretest Summary ---
Mean: 2.11
Median: 2.00
Standard Deviation: 1.14
Min: 0
Max: 5

--- Posttest Summary ---
Mean: 3.04
Median: 3.00
Standard Deviation: 1.19
Min: 0
Max: 5


In [34]:
summarize_deltas(lab_deltas, "Lab")
summarize_deltas(classroom_deltas, "Watrin")


--- Delta Summary for LAB Group ---
Mean Delta: 0.89
Median Delta: 1.00
Standard Deviation: 1.70
Min Delta: -3
Max Delta: 5

Delta Counts:
  Positive change: 64
  Negative change: 23
  No change: 19

--- Delta Summary for WATRIN Group ---
Mean Delta: 0.92
Median Delta: 1.00
Standard Deviation: 1.55
Min Delta: -2
Max Delta: 5

Delta Counts:
  Positive change: 62
  Negative change: 19
  No change: 23
