In [1]:
import numpy as np
import pandas as pd

# Define user profiles
user_profiles = [
    {"id": 1, "age": 25, "gender": "M", "fitness_level": "high", "response_rate": 0.9},
    {"id": 2, "age": 40, "gender": "F", "fitness_level": "medium", "response_rate": 0.7},
    {"id": 3, "age": 60, "gender": "M", "fitness_level": "low", "response_rate": 0.5},
    # Add more profiles as needed
]


In [2]:
# Define nudges
nudges = ["gentle", "moderate", "intense"]

# Define simulation duration
days = 30

# Generate synthetic data
data = []

for day in range(days):
    for user in user_profiles:
        nudge = np.random.choice(nudges)
        response = np.random.rand() < user["response_rate"]
        steps = np.random.randint(1000, 5000) if response else np.random.randint(0, 1000)
        
        data.append({
            "user_id": user["id"],
            "day": day + 1,
            "nudge": nudge,
            "response": response,
            "steps": steps
        })

# Convert to DataFrame
df = pd.DataFrame(data)
print(df.head())


   user_id  day     nudge  response  steps
0        1    1  moderate     False    410
1        2    1   intense     False    169
2        3    1  moderate      True   1464
3        1    2    gentle      True   4697
4        2    2  moderate      True   4482


In [3]:
def simulate_user_response(user_profile, nudge):
    base_steps = 2000
    multiplier = {
        "gentle": 1.0,
        "moderate": 1.5,
        "intense": 2.0
    }
    
    steps = base_steps * multiplier[nudge]
    noise = np.random.normal(0, 200)  # Add some randomness
    response = np.random.rand() < user_profile["response_rate"]
    
    return max(0, steps + noise) if response else np.random.randint(0, 1000)

# Generate synthetic data with the behavior model
data = []

for day in range(days):
    for user in user_profiles:
        nudge = np.random.choice(nudges)
        steps = simulate_user_response(user, nudge)
        
        data.append({
            "user_id": user["id"],
            "day": day + 1,
            "nudge": nudge,
            "steps": steps
        })

# Convert to DataFrame
df = pd.DataFrame(data)
print(df.head())


   user_id  day     nudge        steps
0        1    1    gentle  1933.001134
1        2    1    gentle  2371.579560
2        3    1    gentle  2271.879243
3        1    2  moderate  3041.999281
4        2    2   intense  4328.176531


In [4]:
# Save to CSV
df.to_csv("synthetic_user_data.csv", index=False)

# Load and analyze
df = pd.read_csv("synthetic_user_data.csv")
print(df.describe())


         user_id        day        steps
count  90.000000  90.000000    90.000000
mean    2.000000  15.500000  2296.582453
std     0.821071   8.703932  1489.355212
min     1.000000   1.000000     3.000000
25%     1.000000   8.000000   679.500000
50%     2.000000  15.500000  2185.356278
75%     3.000000  23.000000  3881.040154
max     3.000000  30.000000  4473.732386
