In [8]:
import pandas as pd
import random
# Define blocks and trial structure
blocks = ['health', 'taste', 'choice']
trials_per_block = 75
participants = range(1, 41)

# Generate 50 unique foods
base_foods = [
    'apple', 'banana', 'burger', 'carrot', 'donut', 'eggs', 'fries', 'grapes', 'ice cream', 'kale',
    'pizza', 'yogurt', 'spinach', 'steak', 'candy', 'popcorn', 'mango', 'nuts', 'cheese', 'chicken',
    'broccoli', 'chocolate', 'granola', 'lettuce', 'pasta', 'salmon', 'tofu', 'soda', 'rice', 'beans',
    'cucumber', 'peach', 'bacon', 'cereal', 'toast', 'avocado', 'beef', 'peanut butter', 'cake', 'milk',
    'watermelon', 'pear', 'turkey', 'onion rings', 'oatmeal', 'cranberries', 'syrup', 'waffles', 'cookie', 'shrimp'
]
assert len(base_foods) == 50


# Assign fat and sugar levels randomly
food_properties = {}
for food in base_foods:
    fat = random.choices(['high', 'low'], weights=[0.4, 0.6])[0]
    sugar = random.choices(['high', 'low'], weights=[0.5, 0.5])[0]
    food_properties[food] = {'fat': fat, 'sugar': sugar}

# Generate trials
all_trials = []

for participant in participants:
    for block in blocks:
        for trial_num in range(1, trials_per_block + 1):
            food = random.choice(base_foods)
            rt_missing = random.random() < 0.02  # 2% chance of missing RT
            reaction_time = None if rt_missing else round(random.uniform(0.5, 4.0), 2)
            rating = None if reaction_time is None else random.randint(1, 10)

            fat = food_properties[food]['fat']
            sugar = food_properties[food]['sugar']

            trial = {
                'ID': participant,
                'block': block,
                'trial_number': trial_num,
                'food': food,
                'reaction_time': reaction_time,
                'rating': rating,
                'fat': fat,
                'sugar': sugar
            }
            all_trials.append(trial)

# Create DataFrame
df = pd.DataFrame(all_trials)

In [9]:
# demo frame
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of individuals
n = 40

# Generate IDs
ids = list(range(1, n + 1))

# Generate Age (18 to 65)
ages = np.random.randint(18, 66, size=n)

# Patient status: 50% patients
patient_status = np.random.choice(['Yes', 'No'], size=n, p=[0.5, 0.5])

# Medication: 60% of patients are on medication, 20% of non-patients
medication = [
    'Yes' if (pat == 'Yes' and np.random.rand() < 0.6) or (pat == 'No' and np.random.rand() < 0.2) else 'No'
    for pat in patient_status
]

# BMI: lower for patients (mean 17.5) vs non-patients (mean 23.5)
bmi = [
    round(np.random.normal(17.5, 1.2), 1) if pat == 'Yes' else round(np.random.normal(23.5, 2.0), 1)
    for pat in patient_status
]

# Lunch intake values
fat = [int(np.random.normal(12, 4)) if pat == 'Yes' else int(np.random.normal(25, 5)) for pat in patient_status]
calories = [int(np.random.normal(320, 80)) if pat == 'Yes' else int(np.random.normal(650, 100)) for pat in patient_status]
protein = [int(np.random.normal(14, 4)) if pat == 'Yes' else int(np.random.normal(30, 6)) for pat in patient_status]
sugar = [int(np.random.normal(18, 5)) if pat == 'Yes' else int(np.random.normal(25, 6)) for pat in patient_status]

# EDE Global Score: higher for patients (mean 4.5), lower for non-patients (mean 1.0)
ede_score = [
    round(np.random.normal(4.5, 0.8), 1) if pat == 'Yes' else round(np.random.normal(1.0, 0.6), 1)
    for pat in patient_status
]

# Duration of illness for patients, blank for non-patients
duration = [
    round(np.random.uniform(1, 10), 1) if pat == 'Yes' else ''
    for pat in patient_status
]

# Assemble the DataFrame
data = pd.DataFrame({
    'ID': ids,
    'Age': ages,
    'Patient': patient_status,
    'Medication': medication,
    'BMI': bmi,
    'Fat (g)': fat,
    'Calories': calories,
    'Protein (g)': protein,
    'Sugar (g)': sugar,
    'EDE Score': ede_score,
    'Duration of Illness': duration
})

print(data)


    ID  Age Patient Medication   BMI  Fat (g)  Calories  Protein (g)  \
0    1   56     Yes        Yes  17.9        6       365            4   
1    2   46     Yes        Yes  18.6        9       306           12   
2    3   32      No        Yes  22.3       24       635           36   
3    4   60      No         No  22.9       32       456           27   
4    5   25     Yes         No  15.7       11       332           16   
5    6   38     Yes         No  17.2       12       318            9   
6    7   56     Yes         No  18.7       14       213            8   
7    8   36     Yes         No  17.2       14       317           10   
8    9   40      No         No  22.0       25       784           23   
9   10   28      No         No  22.8       19       576           22   
10  11   28      No         No  24.9       28       554           32   
11  12   41     Yes        Yes  17.2       13       408           21   
12  13   53     Yes        Yes  18.5        7       370         

In [10]:
df.to_csv("/Users/emilylloyd/Documents/Coding Course/FCT_df.csv")

In [11]:
data.to_csv("/Users/emilylloyd/Documents/Coding Course/demo.csv")

In [None]:
# generate the DD data

In [12]:
import numpy as np
import pandas as pd

np.random.seed(42)

n_participants = 100
n_trials = 50
conditions = ['control', 'future']
groups = ['control', 'SUD']

# Create participant-level data
participants = []
for i in range(n_participants):
    pid = i + 1
    group = 'SUD' if i < n_participants / 2 else 'control'
    age = np.random.normal(30, 5)
    impulsivity = np.random.normal(0, 1) + (0.5 if group == 'SUD' else 0)
    
    participants.append({'participant_id': pid, 'group': group, 'age': age, 'impulsivity': impulsivity})

df_demo = pd.DataFrame(participants)

# Simulate trial-level choices
trials = []

for _, row in df_demo.iterrows():
    for condition in conditions:
        for trial in range(n_trials):
            base_prob = 0.5

            # Condition effect: future condition increases delayed choice
            if condition == 'future':
                base_prob += 0.2
            
            # SUD effect at baseline (control condition)
            if row['group'] == 'SUD' and condition == 'control':
                base_prob -= 0.2
            
            # Impulsivity effect: stronger in SUD group
            impulsivity_effect = -0.1 * row['impulsivity'] if row['group'] == 'SUD' else -0.05 * row['impulsivity']
            prob = base_prob + impulsivity_effect

            # Clip probability to 0–1
            prob = np.clip(prob, 0.05, 0.95)

            choice = np.random.binomial(1, prob)

            trials.append({
                'participant_id': row['participant_id'],
                'group': row['group'],
                'age': row['age'],
                'impulsivity': row['impulsivity'],
                'condition': condition,
                'trial': trial + 1,
                'chose_delayed': choice
            })

df_trials = pd.DataFrame(trials)
df_trials.to_csv("/Users/emilylloyd/Documents/Coding Course/DD_df.csv")

In [13]:
df_trials

Unnamed: 0,participant_id,group,age,impulsivity,condition,trial,chose_delayed
0,1,SUD,32.483571,0.361736,control,1,1
1,1,SUD,32.483571,0.361736,control,2,1
2,1,SUD,32.483571,0.361736,control,3,0
3,1,SUD,32.483571,0.361736,control,4,0
4,1,SUD,32.483571,0.361736,control,5,0
...,...,...,...,...,...,...,...
9995,100,control,30.291044,-1.142970,future,46,1
9996,100,control,30.291044,-1.142970,future,47,1
9997,100,control,30.291044,-1.142970,future,48,1
9998,100,control,30.291044,-1.142970,future,49,1
