In [None]:
import os # Handles file paths and directories
import json # Parses and loads JSON files for rules and configurations
import unittest # Framework for writing and running unit tests

In [None]:
# To load numerical rules from the JSON file
with open("analysis/data/derivedData/rules_audio.json", "r") as file:
    numerical_data_rules = json.load(file)

In [None]:
# To load configuration file for dataset size thresholds
with open("analysis/data/derivedData/config.json", "r") as config_file:
    config = json.load(config_file)

# Threshold values for small and large datasets
SMALL_THRESHOLD = config["dataset_thresholds"]["small_dataset"]
LARGE_THRESHOLD = config["dataset_thresholds"]["large_dataset"]

In [1]:
def validate_logic(models, condition=None):
    """
    Validates if the models fit the condition.

    Args:
        models (list): List of models retrieved from the rules.
        condition (str, optional): Specific condition being validated.

    Returns:
        bool: True if validation passes, False otherwise.
    """
    if not models:
        print(f"Warning: No models fit the condition '{condition}'.")
        return False
    return True

def handle_flat_conditions(sub_conditions, indent_level=2):
    """
    Handles flat ML/DL conditions for audio tasks.
    """
    logic = ""
    indent = "  " * indent_level
    for approach, model_list in sub_conditions.items():
        logic += f"{indent}Use {approach} models: {', '.join(model_list)}\n"
    return logic

def generate_task_logic(data, task_name):
    """
    Handles logic dynamically for all audio tasks.
    """
    logic_text = f"If the problem is {task_name.replace('_', ' ')}:\n"
    task_details = data["tasks"][task_name]

    for condition, sub_conditions in task_details.items():
        if condition == "default":
            logic_text += f"  Else:\n"
        else:
            logic_text += f"  If {condition}:\n"
        
        logic_text += handle_flat_conditions(sub_conditions, indent_level=2)

    return logic_text

def generate_audio_logic(task):
    """
    Generate ML/DL decision logic dynamically for audio data tasks.

    Args:
        task (str): Task type (e.g., 'audio_classification', 'speech_recognition').

    Returns:
        str: Decision logic text.
    """
    valid_tasks = audio_data_rules["tasks"].keys()
    if task not in valid_tasks:
        raise ValueError(f"Task '{task}' is not supported. Available tasks: {', '.join(valid_tasks)}")

    return generate_task_logic(audio_data_rules, task)

# Example usage of audio logic generation
print(generate_audio_logic("audio_classification"))
print(generate_audio_logic("sequential_audio_patterns"))
print(generate_audio_logic("speech_recognition"))
print(generate_audio_logic("text_to_speech_synthesis"))
print(generate_audio_logic("audio_synthesis"))

# Function to validate model choices based on dataset characteristics
def validate_model_choice(task_rules, task_name, dataset_characteristics):
    """
    Validate model choices based on dataset characteristics and task rules.
    Raises warnings if no model fits the conditions defined in the rules.
    """
    if task_name not in task_rules["tasks"]:
        raise ValueError(f"Task '{task_name}' is not supported.")

    task_details = task_rules["tasks"][task_name]
    applicable_models = []

    # Check if any condition matches the dataset characteristics
    for condition, sub_conditions in task_details.items():
        if condition == "default":
            continue  # Default is handled later

        if condition in dataset_characteristics:
            applicable_models.extend(
                model
                for approach in sub_conditions.values()
                for model in approach
            )

    # Return default models only if some valid characteristics exist but no models match
    if not applicable_models and "default" in task_details:
        # Return default models only if characteristics are valid (but no specific matches)
        if any(cond in task_details for cond in dataset_characteristics):
            applicable_models.extend(
                model
                for approach in task_details["default"].values()
                for model in approach
            )

    if not applicable_models:
        print(f"Warning: No suitable models found for task '{task_name}' with characteristics {dataset_characteristics}.")
        return None

    return applicable_models

# Example validation for audio data rules
dataset_characteristics_audio = ["requires_real_time_constraints", "is_proximity_based"]
print("Audio Classification:", validate_model_choice(audio_data_rules, "audio_classification", dataset_characteristics_audio))

dataset_characteristics_audio_empty = ["irrelevant_characteristic"]
print("Audio Classification (No match):", validate_model_choice(audio_data_rules, "audio_classification", dataset_characteristics_audio_empty))

# Unit tests for audio data logic
class TestAudioLogic(unittest.TestCase):
    """
    Unit tests for the generate_audio_logic function and model validation.
    """
    def test_audio_classification_logic(self):
        """
        Test audio classification logic.
        """
        result = generate_audio_logic("audio_classification")
        self.assertIn("CNN-based Models", result, "Expected 'CNN-based Models' in audio classification logic.")

    def test_speech_recognition_logic(self):
        """
        Test speech recognition logic.
        """
        result = generate_audio_logic("speech_recognition")
        self.assertIn("DeepSpeech", result, "Expected 'DeepSpeech' in speech recognition logic.")

    def test_invalid_task(self):
        """
        Test behavior for an invalid task.
        """
        with self.assertRaises(ValueError):
            generate_audio_logic("invalid_task")

    def test_model_validation(self):
        """
        Test model validation based on dataset characteristics.
        """
        dataset_characteristics = ["requires_real_time_constraints", "is_proximity_based"]
        result = validate_model_choice(audio_data_rules, "audio_classification", dataset_characteristics)
        self.assertIn("CNN-based Models", result, "Expected 'CNN-based Models' in the applicable models for audio classification.")
        
    def test_invalid_characteristics(self):
        """
        Test model validation with invalid dataset characteristics.
        """
        dataset_characteristics = ["unknown_condition"]
        result = validate_model_choice(audio_data_rules, "audio_classification", dataset_characteristics)
        self.assertIsNone(result, "Expected no models to be found for unknown conditions.")

if __name__ == "__main__":
    unittest.main(argv=[''], exit=False)


.....
----------------------------------------------------------------------
Ran 5 tests in 0.012s

OK


If the problem is audio classification:
  If requires_real_time_constraints:
    Use DL models: CNN-based Models
  If is_proximity_based:
    Use ML models: K-Nearest Neighbors (KNN)
  If small_dataset:
    Use ML models: Support Vector Machine (SVM)
  If requires_interpretability:
    Use ML models: Decision Trees
  If binary_classification:
    Use ML models: Logistic Regression
  Else:
    Use DL models: RNN-based Models

If the problem is sequential audio patterns:
  If requires_long_term_dependencies:
    Use DL models: RNN, LSTM
  If requires_real_time_constraints:
    Use DL models: Temporal Convolutional Networks (TCN)
  Else:
    Use ML models: Hidden Markov Models (HMM)

If the problem is speech recognition:
  If speech_to_text:
    Use DL models: CTC with LSTM
  If requires_real_time_constraints:
    Use DL models: DeepSpeech, Wav2Vec 2.0
  Else:
    Use DL models: Speech Transformer

If the problem is text to speech synthesis:
  If requires_real_time_constraints:
    Use DL