In [None]:
import json  # Parses and loads JSON files for rules and configurations
import unittest  # Framework for writing and running unit tests

In [None]:
# To load numerical rules from the JSON file
with open("analysis/data/derivedData/rules_timeseries.json", "r") as file:
    numerical_data_rules = json.load(file)

In [None]:
# To load configuration file for dataset size thresholds
with open("analysis/data/derivedData/config.json", "r") as config_file:
    config = json.load(config_file)

# Threshold values for small and large datasets
SMALL_THRESHOLD = config["dataset_thresholds"]["small_dataset"]
LARGE_THRESHOLD = config["dataset_thresholds"]["large_dataset"]

In [3]:
def validate_logic(models, condition=None):
    """
    Validates if the models fit the condition.

    Args:
        models (list): List of models retrieved from the rules.
        condition (str, optional): Specific condition being validated.

    Returns:
        bool: True if validation passes, False otherwise.
    """
    if not models:
        print(f"Warning: No models fit the condition '{condition}'.")
        return False
    return True

def handle_flat_conditions(sub_conditions, indent_level=2):
    """
    Handles flat ML/DL conditions for time series tasks.
    """
    logic = ""
    indent = "  " * indent_level
    for approach, model_list in sub_conditions.items():
        logic += f"{indent}Use {approach} models: {', '.join(model_list)}\n"
    return logic

def generate_task_logic(data, task_name):
    """
    Handles logic dynamically for all time series tasks.
    """
    logic_text = f"If the problem is {task_name.replace('_', ' ')}:\n"
    task_details = data["tasks"][task_name]

    for condition, sub_conditions in task_details.items():
        if condition == "default":
            logic_text += f"  Else:\n"
        else:
            logic_text += f"  If {condition}:\n"
        
        logic_text += handle_flat_conditions(sub_conditions, indent_level=2)

    return logic_text

def generate_time_series_logic(task):
    """
    Generate ML/DL decision logic dynamically for time series data tasks.
    
    Args:
        task (str): Task type (e.g., 'time_series_forecasting', 'time_series_classification').

    Returns:
        str: Decision logic text.
    """
    valid_tasks = time_series_data_rules["tasks"].keys()
    if task not in valid_tasks:
        raise ValueError(f"Task '{task}' is not supported. Available tasks: {', '.join(valid_tasks)}")

    return generate_task_logic(time_series_data_rules, task)

# Example usage
print(generate_time_series_logic("time_series_forecasting"))
print(generate_time_series_logic("time_series_classification"))
print(generate_time_series_logic("multi_variate_analysis"))
print(generate_time_series_logic("long_term_dependencies"))
print(generate_time_series_logic("real_time_processing"))

# Function to validate model choices based on dataset characteristics
def validate_model_choice(task_rules, task_name, dataset_characteristics):
    """
    Validate model choices based on dataset characteristics and task rules.
    Raises warnings if no model fits the conditions defined in the rules.
    """
    if task_name not in task_rules["tasks"]:
        raise ValueError(f"Task '{task_name}' is not supported.")

    task_details = task_rules["tasks"][task_name]
    applicable_models = []

    # Check if any condition matches the dataset characteristics
    for condition, sub_conditions in task_details.items():
        if condition == "default":
            continue  # Default is handled later

        if condition in dataset_characteristics:
            applicable_models.extend(
                model
                for approach in sub_conditions.values()
                for model in approach
            )

    # Return default models only if some valid characteristics exist but no models match
    if not applicable_models and "default" in task_details:
        # Return default models only if characteristics are valid (but no specific matches)
        if any(cond in task_details for cond in dataset_characteristics):
            applicable_models.extend(
                model
                for approach in task_details["default"].values()
                for model in approach
            )

    if not applicable_models:
        print(f"Warning: No suitable models found for task '{task_name}' with characteristics {dataset_characteristics}.")
        return None

    return applicable_models

# Example validation
dataset_characteristics_time_series = ["is_stationary", "requires_real_time_constraints"]
print("Time Series Forecasting:", validate_model_choice(time_series_data_rules, "time_series_forecasting", dataset_characteristics_time_series))

dataset_characteristics_time_series_empty = ["unknown_characteristic"]
print("Time Series Forecasting (No match):", validate_model_choice(time_series_data_rules, "time_series_forecasting", dataset_characteristics_time_series_empty))

# Unit tests for time series logic
class TestTimeSeriesLogic(unittest.TestCase):
    """
    Unit tests for the generate_time_series_logic function.
    """
    def test_time_series_forecasting(self):
        """
        Test time series forecasting logic.
        """
        result = generate_time_series_logic("time_series_forecasting")
        self.assertIn("ARIMA", result, "Expected 'ARIMA' in time series forecasting logic.")

    def test_time_series_classification(self):
        """
        Test time series classification logic.
        """
        result = generate_time_series_logic("time_series_classification")
        self.assertIn("LSTM", result, "Expected 'LSTM' in time series classification logic.")

    def test_invalid_task(self):
        """
        Test behavior for an invalid task.
        """
        with self.assertRaises(ValueError):
            generate_time_series_logic("invalid_task")

    def test_model_validation(self):
        """
        Test model validation based on dataset characteristics.
        """
        dataset_characteristics = ["is_stationary", "requires_real_time_constraints"]
        result = validate_model_choice(time_series_data_rules, "time_series_forecasting", dataset_characteristics)
        self.assertIn("ARIMA", result, "Expected 'ARIMA' in the applicable models for time series forecasting.")
        
    def test_invalid_characteristics(self):
        """
        Test model validation with invalid dataset characteristics.
        """
        dataset_characteristics = ["unknown_condition"]
        result = validate_model_choice(time_series_data_rules, "time_series_forecasting", dataset_characteristics)
        self.assertIsNone(result, "Expected no models to be found for unknown conditions.")

if __name__ == "__main__":
    unittest.main(argv=[''], exit=False)


.....
----------------------------------------------------------------------
Ran 5 tests in 0.014s

OK


If the problem is time series forecasting:
  If is_stationary:
    Use ML models: ARIMA
  If has_seasonality_or_trend:
    Use ML models: SARIMA, Prophet
  If requires_real_time_constraints:
    Use DL models: Light-weight LSTM
  Else:
    Use DL models: Transformer Models

If the problem is time series classification:
  If requires_real_time_constraints:
    Use DL models: Light-weight CNNs, LSTM
  Else:
    Use DL models: Transformer Models

If the problem is multi variate analysis:
  If requires_interpretability:
    Use ML models: Vector Autoregression (VAR)
  Else:
    Use DL models: Time Series Transformer

If the problem is long term dependencies:
  If requires_long_term_dependencies:
    Use DL models: Long LSTM Models, GRU
  Else:
    Use DL models: Transformer Models

If the problem is real time processing:
  Else:
    Use DL models: Temporal Convolutional Network (TCN)

Time Series Forecasting: ['ARIMA', 'Light-weight LSTM']
Time Series Forecasting (No match): None
