In [6]:
import math
from scipy import stats

def cohens_d_independent(m1, s1, n1, m2, s2, n2):
    # 1. pooled SD
    s1_sq = s1**2
    s2_sq = s2**2
    pooled_var = ((n1-1)*s1_sq + (n2-1)*s2_sq) / (n1+n2-2)
    s_pooled = math.sqrt(pooled_var)
    
    # 2. Cohen's d
    diff = m1 - m2
    d = diff / s_pooled
    
    return d

# Example
m1, s1, n1 = 80, 6, 15    # Group 1: mean, std, n
m2, s2, n2 = 75, 6, 15    # Group 2: mean, std, n

d = cohens_d_independent(m1, s1, n1, m2, s2, n2)

In [7]:
from scipy.stats import norm

alpha = 0.05   # type I error rate (1-alpha confidence level)
beta = 0.2     # type II error rate (1 - power)
norm.ppf(1 - alpha / 2)  # two-tailed z critical value for 95% confidence interval
norm.ppf(1 - beta)  # z critical value for 80% power

# Sample size calculation for one-sample z-test for mean
def calculate_sample_size(d, alpha, beta):
    z_alpha = norm.ppf(1 - alpha / 2)  # two-tailed
    z_beta = norm.ppf(1 - beta)
    n = 2 * (z_alpha + z_beta) ** 2 / d ** 2
    return int(n) + 1  # round up to next whole number

calculate_sample_size(d, alpha, beta)  # Example usage

23

# Experiment design

Generates a complete experimental design for assigning videos and beliefs to subjects. It creates all possible permutations of video orders (`all_orders`) and assigns random beliefs (`all_beliefs`) to each video for every subject. The design matrix (`design`) combines video order, belief, order index, and subject ID into a structured format. Finally, it prints the design in a tabular format, showing the video, belief, order, and subject for each assignment.

In [4]:
import numpy as np
from itertools import permutations

video_values = [
    '4171487-uhd_3840_2160_30fps.mp4', 
    '5768645-uhd_3840_2160_25fps.mp4', 
    '11946387_3840_2160_30fps.mp4', 
    '11946387_3840_2160_30fps.mp4'
]

belief_values = ['AI Generated', 'Human Generated'] # index 0='AI', 1='Human'


# -----------------------------
# Derive orders (one per subject)
# -----------------------------
videos = np.arange(1, len(video_values) + 1)  # [1,2,3,4]
all_orders = np.array(list(permutations(videos)))  # 24 permutations for 4 videos
n_subjects = all_orders.shape[0]

# Ensure we can split subjects evenly between the two label strata
if n_subjects % 2 != 0:
    raise ValueError("Number of subjects must be even to split label strata 50/50.")

# -----------------------------
# Define the two label strata (by video ID, not by order)
# Stratum A: 1,2 = Human ; 3,4 = AI
# Stratum B: 1,2 = AI    ; 3,4 = Human
# (encode: 0='AI', 1='Human')
# -----------------------------
label_map_A = np.array([1, 1, 0, 0])  # index 0->video1, etc.
label_map_B = 1 - label_map_A         # flips AI/Human for each video

# -----------------------------
# Build design
# -----------------------------
subjects = np.repeat(np.arange(1, n_subjects + 1), videos.size)        # 1..24 repeated 4 times
orders   = np.tile(np.arange(1, videos.size + 1), n_subjects)          # 1..4 for each subject
video_seq = all_orders.flatten()

# Pick which subjects use which stratum (first half A, second half B)
which_stratum = np.array([0]*(n_subjects//2) + [1]*(n_subjects//2))    # 0=A, 1=B

# Compute beliefs per row by applying the subject's stratum map to the video ID
belief_codes = []
for s_idx in range(n_subjects):
    v_ids = all_orders[s_idx]                  # length-4 array of video IDs in that subject's order
    lmap = label_map_A if which_stratum[s_idx] == 0 else label_map_B
    belief_codes.append(lmap[v_ids - 1])       # map by video ID (1-based -> 0-based index)
belief_codes = np.concatenate(belief_codes)

# Stack
design = np.column_stack((video_seq, belief_codes, orders, subjects))

# -----------------------------
# Pretty print
# -----------------------------
print("Video\tBelief\t\t\tOrder\tSubject")
for i, row in enumerate(design):
    video_id, belief_code, ord_idx, subj = row.astype(int)
    belief_str = belief_values[belief_code]
    print(f"{video_id}\t{belief_str:<16}\t{ord_idx}\t{subj}")
    if (i + 1) % videos.size == 0:
        print()

# -----------------------------
# Sanity checks (optional): balance per video across subjects
# -----------------------------
if True:
    # For each video, count how many times it's labeled AI/Human across all subjects
    counts = {vid: {"AI": 0, "Human": 0} for vid in videos}
    for (vid, bcode) in design[:, :2].astype(int):
        if bcode == 0:
            counts[vid]["AI"] += 1
        else:
            counts[vid]["Human"] += 1
    print("Label counts per video across subjects:")
    for vid in videos:
        print(f"Video {vid}: AI={counts[vid]['AI']}, Human={counts[vid]['Human']}")

Video	Belief			Order	Subject
1	Human Generated 	1	1
2	Human Generated 	2	1
3	AI Generated    	3	1
4	AI Generated    	4	1

1	Human Generated 	1	2
2	Human Generated 	2	2
4	AI Generated    	3	2
3	AI Generated    	4	2

1	Human Generated 	1	3
3	AI Generated    	2	3
2	Human Generated 	3	3
4	AI Generated    	4	3

1	Human Generated 	1	4
3	AI Generated    	2	4
4	AI Generated    	3	4
2	Human Generated 	4	4

1	Human Generated 	1	5
4	AI Generated    	2	5
2	Human Generated 	3	5
3	AI Generated    	4	5

1	Human Generated 	1	6
4	AI Generated    	2	6
3	AI Generated    	3	6
2	Human Generated 	4	6

2	Human Generated 	1	7
1	Human Generated 	2	7
3	AI Generated    	3	7
4	AI Generated    	4	7

2	Human Generated 	1	8
1	Human Generated 	2	8
4	AI Generated    	3	8
3	AI Generated    	4	8

2	Human Generated 	1	9
3	AI Generated    	2	9
1	Human Generated 	3	9
4	AI Generated    	4	9

2	Human Generated 	1	10
3	AI Generated    	2	10
4	AI Generated    	3	10
1	Human Generated 	4	10

2	Human Generated 	1	11
4	AI Generate

In [5]:
print("Total number of subjects:", len(np.unique(design[:, 3])))


Total number of subjects: 24


In [9]:
design.shape, design.sum(axis=0)  # Sum of each column

((96, 4), array([ 240,   58,  240, 1200]))

Outputs the design in a CSV file that can be imported in PsychoPy

In [10]:
import csv
import numpy as np
out_path = "./subjects_design.csv"
header = ["video1","label1","video2","label2","video3","label3","video4","label4"]
subjects = np.unique(design[:, 3])
with open(out_path, "w", newline="") as f:
    w = csv.writer(f)
    w.writerow(header)
    for s in subjects:
        rows = design[design[:, 3] == s]
        rows = rows[rows[:, 2].argsort()]  # sort by Order
        row = []
        for r in rows:
            row.append(video_values[r[0] - 1])  # video
            row.append(belief_values[r[1]])     # label/belief
        w.writerow(row)
print(f"Saved CSV to: {out_path}")

Saved CSV to: ./subjects_design.csv
