## Synthetic Data Generation for Vision and Hearing assesment models

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import random

In [None]:
def generate_user_data(num_records):
    user_ids = [f"{i+1:03}" for i in range(num_records)]
    ages = np.random.randint(18, 65, size=num_records)
    genders = np.random.choice(['Male', 'Female'], size=num_records, p=[0.50, 0.50])
    return user_ids, ages, genders

def generate_test_data(num_records, test_type):
    # Common data
    user_ids, ages, genders = generate_user_data(num_records)

    # Feature ranges for the tests
    if test_type == "Visual Acuity":
        accuracies = np.random.uniform(50, 100, size=num_records).round(2)
        response_times = np.random.uniform(800, 2000, size=num_records).round(0)
        num_correct = (accuracies / 100 * 20).round(0)
        classifications = np.where(accuracies >= 85, 'Good',
                                    np.where(accuracies >= 70, 'Normal', 'Bad'))
        data = {
            "UserID": user_ids, "Age": ages, "Gender": genders,
            "Accuracy (%)": accuracies, "ResponseTime (ms)": response_times,
            "NumCorrectLetters": num_correct, "EyesightClassification": classifications
        }
    elif test_type == "Color Vision":
        accuracies = np.random.uniform(40, 100, size=num_records).round(2)
        response_times = np.random.uniform(900, 2500, size=num_records).round(0)
        num_correct = (accuracies / 100 * 20).round(0)
        classifications = np.where(accuracies >= 85, 'Good',
                                    np.where(accuracies >= 60, 'Normal', 'Bad'))
        data = {
            "UserID": user_ids, "Age": ages, "Gender": genders,
            "Accuracy (%)": accuracies, "ResponseTime (ms)": response_times,
            "NumCorrectPlates": num_correct, "EyesightClassification": classifications
        }
    elif test_type == "Sound Localization":
        accuracies = np.random.uniform(40, 100, size=num_records).round(2)
        response_times = np.random.uniform(1000, 2500, size=num_records).round(0)
        num_correct = (accuracies / 100 * 25).round(0)
        classifications = np.where(accuracies >= 85, 'Good',
                                    np.where(accuracies >= 60, 'Normal', 'Bad'))
        data = {
            "UserID": user_ids, "Age": ages, "Gender": genders,
            "Accuracy (%)": accuracies, "ResponseTime (ms)": response_times,
            "NumCorrectSounds": num_correct, "HearingClassification": classifications
        }
    elif test_type == "Speech Discrimination":
        accuracies = np.random.uniform(40, 100, size=num_records).round(2)
        response_times = np.random.uniform(1000, 3000, size=num_records).round(0)
        num_correct = (accuracies / 100 * 30).round(0)
        classifications = np.where(accuracies >= 85, 'Good',
                                    np.where(accuracies >= 60, 'Normal', 'Bad'))
        data = {
            "UserID": user_ids, "Age": ages, "Gender": genders,
            "Accuracy (%)": accuracies, "ResponseTime (ms)": response_times,
            "NumCorrectPhrases": num_correct, "HearingClassification": classifications
        }
    else:
        raise ValueError("Invalid test type.")

    return pd.DataFrame(data)

In [None]:
# Generate datasets for each test
num_records = 400
visual_acuity_data = generate_test_data(num_records, "Visual Acuity")
color_vision_data = generate_test_data(num_records, "Color Vision")
sound_localization_data = generate_test_data(num_records, "Sound Localization")
speech_discrimination_data = generate_test_data(num_records, "Speech Discrimination")

file_paths = {
    "Visual Acuity": "/content/visual_acuity_data.csv",
    "Color Vision": "/content/color_vision_data.csv",
    "Sound Localization": "/content/sound_localization_data.csv",
    "Speech Discrimination": "/content/speech_discrimination_data.csv"
}

visual_acuity_data.to_csv(file_paths["Visual Acuity"], index=False)
color_vision_data.to_csv(file_paths["Color Vision"], index=False)
sound_localization_data.to_csv(file_paths["Sound Localization"], index=False)
speech_discrimination_data.to_csv(file_paths["Speech Discrimination"], index=False)

In [None]:
visual_acuity_data.head()

Unnamed: 0,UserID,Age,Gender,Accuracy (%),ResponseTime (ms),NumCorrectLetters,EyesightClassification
0,1,46,Male,61.85,1253.0,12.0,Bad
1,2,39,Male,61.23,1664.0,12.0,Bad
2,3,45,Male,79.75,1733.0,16.0,Normal
3,4,27,Male,91.88,1340.0,18.0,Good
4,5,25,Male,59.48,1615.0,12.0,Bad


In [None]:
color_vision_data.head()

Unnamed: 0,UserID,Age,Gender,Accuracy (%),ResponseTime (ms),NumCorrectPlates,EyesightClassification
0,1,60,Female,41.58,1347.0,8.0,Bad
1,2,46,Male,69.0,2428.0,14.0,Normal
2,3,30,Female,44.34,2092.0,9.0,Bad
3,4,62,Male,80.67,2181.0,16.0,Normal
4,5,38,Female,40.08,1993.0,8.0,Bad


In [None]:
sound_localization_data.head()

Unnamed: 0,UserID,Age,Gender,Accuracy (%),ResponseTime (ms),NumCorrectSounds,HearingClassification
0,1,62,Male,95.03,2061.0,24.0,Good
1,2,60,Male,69.44,1302.0,17.0,Normal
2,3,36,Male,82.98,2107.0,21.0,Normal
3,4,37,Male,94.43,1478.0,24.0,Good
4,5,32,Male,82.18,1425.0,21.0,Normal


In [None]:
speech_discrimination_data.head()

Unnamed: 0,UserID,Age,Gender,Accuracy (%),ResponseTime (ms),NumCorrectPhrases,HearingClassification
0,1,63,Female,57.22,1678.0,17.0,Bad
1,2,18,Male,57.23,2260.0,17.0,Bad
2,3,41,Male,82.93,2287.0,25.0,Normal
3,4,47,Male,54.72,1152.0,16.0,Bad
4,5,60,Male,80.31,2054.0,24.0,Normal
