<a href="https://colab.research.google.com/github/Nai98877/germandata/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Updated study dataset (total sample size = 800)
study_data = [
    {"Study": "Mobile Usability Challenges", "Authors": "Qingchuan Li et al", "Year": 2020, "Sample_Size": 90, "Avg_Age": 66,
     "Error_Rate": 60, "Common_Errors": ["Navigation Error", "Icon Misinterpretation", "Difficulty Recovering"], "Error_Context": ["Menu"],
     "Feedback_Type": "Visual", "Feedback_Clarity": {"Unclear": 60, "Clear": 40}, "Recovery_Attempts": 75, "Recovery_Success": 20, "Time_to_Recover": 20,
     "Frustration_Level": "High", "Cognitive_Load": "High", "Assistive_Tech_Used": "No", "Task_Pace": "Slow", "Help_Sought": "No", "Final_Outcome": "Task Completed"},

    {"Study": "Influence of Age in Usability Testing", "Authors": "Sonderegger et al", "Year": 2016, "Sample_Size": 30, "Avg_Age": 65,
     "Error_Rate": 90, "Common_Errors": ["Sign Up/Sign In Issues"], "Error_Context": ["Sign Up Pages"],
     "Feedback_Type": "Visual", "Feedback_Clarity": {"Unclear": 100}, "Recovery_Attempts": 55, "Recovery_Success": 100, "Time_to_Recover": 15,
     "Frustration_Level": "High", "Cognitive_Load": "High", "Assistive_Tech_Used": "No", "Task_Pace": "Slow", "Help_Sought": "No", "Final_Outcome": "Task Completed"},

    {"Study": "Use of Mobile Phones by Elderly", "Authors": "Nasir, M., Hassan", "Year": 2008, "Sample_Size": 150, "Avg_Age": 68,
     "Error_Rate": 60, "Common_Errors": ["Small Buttons", "Complex Menu"], "Error_Context": ["Calling", "Texting", "Navigating Features"],
     "Feedback_Type": "Visual", "Feedback_Clarity": {"Unclear": 70, "Clear": 30}, "Recovery_Attempts": 30, "Recovery_Success": 30, "Time_to_Recover": 15,
     "Frustration_Level": "High", "Cognitive_Load": "High", "Assistive_Tech_Used": "No", "Task_Pace": "Slow", "Help_Sought": "Yes", "Final_Outcome": "Task Incomplete"},

    {"Study": "Usability for Senior Citizens", "Authors": "Jakob Nielsen", "Year": 2013, "Sample_Size": 80, "Avg_Age": 70,
     "Error_Rate": 50, "Common_Errors": ["Small Buttons", "Unclear Directions", "Unclear Error Messages"], "Error_Context": ["General Navigation"],
     "Feedback_Type": "Visual", "Feedback_Clarity": {"Unclear": 45, "Clear": 55}, "Recovery_Attempts": 90, "Recovery_Success": 70, "Time_to_Recover": 5,
     "Frustration_Level": "High", "Cognitive_Load": "High", "Assistive_Tech_Used": "No", "Task_Pace": "Medium", "Help_Sought": "Yes", "Final_Outcome": "Task Completed"},

    {"Study": "Smartphone Acceptance & Use", "Authors": "Xiaowen Zhu et al", "Year": 2024, "Sample_Size": 450, "Avg_Age": 66,
     "Error_Rate": 60, "Common_Errors": ["Unclear Error Messages"], "Error_Context": ["Filling Questionnaire"],
     "Feedback_Type": "Written", "Feedback_Clarity": {"Unclear": 65, "Clear": 35}, "Recovery_Attempts": 75, "Recovery_Success": 30, "Time_to_Recover": 40,
     "Frustration_Level": "Medium", "Cognitive_Load": "High", "Assistive_Tech_Used": "No", "Task_Pace": "Slow", "Help_Sought": "Yes", "Final_Outcome": "Task Completed"}
]

# Define feature options (adjusted for realism)
tech_experience_levels = ["Low", "Intermediate", "High"]
tech_experience_probs = [0.5, 0.35, 0.15]  # 50% Low, 35% Intermediate, 15% High (realistic for older adults)
device_types = ["Smartphone"]  # All studies focus on smartphones

# Generate dataset
data = []
user_id = 1000

for study in study_data:
    sample_size = study["Sample_Size"]
    num_errors = int((study["Error_Rate"] / 100) * sample_size)
    num_recovery_attempts = int((study["Recovery_Attempts"] / 100) * num_errors)  # Only error-makers attempt recovery
    num_recovery_successes = int((study["Recovery_Success"] / 100) * num_recovery_attempts)

    for i in range(sample_size):
        made_error = 1 if i < num_errors else 0
        recovery_attempted = 1 if (made_error and i < num_recovery_attempts) else 0
        recovery_success = 1 if (recovery_attempted and i < num_recovery_successes) else 0

        feedback_clarity_choice = np.random.choice(
            list(study["Feedback_Clarity"].keys()),
            p=[v / 100 for v in study["Feedback_Clarity"].values()]
        )

        data.append({
            "User_ID": f"U{user_id}",
            "Study": study["Study"],
            "Authors": study["Authors"],
            "Year": study["Year"],
            "Age_Group": "65+",  # All participants ≥65
            "Device_Type": "Smartphone",
            "Tech_Experience_Level": np.random.choice(tech_experience_levels, p=tech_experience_probs),
            "Made_Error": made_error,
            "Error_Type": np.random.choice(study["Common_Errors"]) if made_error else "None",
            "Error_Context": np.random.choice(study["Error_Context"]) if made_error else "None",
            "Feedback_Type": study["Feedback_Type"],
            "Feedback_Clarity": feedback_clarity_choice,
            "Recovery_Attempted": recovery_attempted,
            "Recovery_Success": recovery_success,
            "Time_to_Recover": study["Time_to_Recover"] if recovery_attempted else "N/A",
            "Frustration_Level": study["Frustration_Level"],
            "Cognitive_Load": study["Cognitive_Load"],
            "Assistive_Tech_Used": study["Assistive_Tech_Used"],
            "Task_Pace": study["Task_Pace"],
            "Help_Sought": study["Help_Sought"],
            "Final_Outcome": study["Final_Outcome"]
        })

        user_id += 1

# Create DataFrame
df = pd.DataFrame(data)

# Save CSV (change the path to your local directory)
file_path = "older_adults_usability_dataset.csv"
df.to_csv(file_path, index=False)
print(f"Dataset saved to {file_path}")

Dataset saved to older_adults_usability_dataset.csv
