In [1]:
import numpy as np
import pandas as pd

# Function to simulate hearing ability (based on general trends in hearing loss with age)
def hearing_threshold_by_age(frequency, age):
    """
    Simulate the hearing threshold at a given frequency and age.
    This function increases the dB threshold (i.e., hearing worsens) with age,
    especially at higher frequencies.
    """
    if frequency < 1000:  # Lower frequencies are less affected by age
        base_threshold = -35
        age_penalty = (age / 100) * 5  # Small increase in dB threshold with age
    else:  # Higher frequencies are more affected by age
        base_threshold = -35 + (frequency - 12000) / 1000  # Base threshold increases with frequency
        age_penalty = (age / 100) * 20  # Larger increase in dB threshold with age
    
    # Simulate hearing threshold by adding a penalty based on age
    threshold = base_threshold + age_penalty
    return np.clip(threshold, -45, 0)  # Limit threshold between -45 dB and 0 dB

# Frequency ranges: 20Hz to 50Hz and 12000Hz to 18000Hz
low_freq_range = np.arange(20, 51)  # 20Hz to 50Hz
high_freq_range = np.arange(12000, 18001)  # 12000Hz to 18000Hz
all_frequencies = np.concatenate([low_freq_range, high_freq_range])

# Generate synthetic data for people aged 10 to 80
ages = np.arange(10, 81)  # Age from 10 to 80 years
dataset = []

for age in ages:
    for freq in all_frequencies:
        hearing_threshold = hearing_threshold_by_age(freq, age)
        dataset.append([age, freq, hearing_threshold])

# Convert to DataFrame for analysis
df = pd.DataFrame(dataset, columns=["Age", "Frequency", "Hearing_Threshold_dB"])

# Save to CSV for model training
df.to_csv("hearing_age_dataset.csv", index=False)
print("Dataset generated and saved to 'hearing_age_dataset.csv'")


Dataset generated and saved to 'hearing_age_dataset.csv'
