In [1]:
import os
import json

In [None]:
with open("analysis/data/derivedData/rules_numerical.json", "r") as file:
    numerical_data_rules = json.load(file)

In [None]:
with open("analysis/data/derivedData/config.json", "r") as config_file:
    config = json.load(config_file)

SMALL_THRESHOLD = config["dataset_thresholds"]["small_dataset"]
LARGE_THRESHOLD = config["dataset_thresholds"]["large_dataset"]

In [None]:
def validate_parameters(data_type, task):
    """Validates input parameters for logic generation."""
    valid_tasks = numerical_data_rules["tasks"].keys()
    if task not in valid_tasks:
        raise ValueError(f"Invalid task: {task}. Choose from {', '.join(valid_tasks)}.")

In [None]:
def regression_logic(data):
    """Handles logic for regression tasks."""
    logic_text = "If the problem is a regression task:\n"
    task_details = data["tasks"]["regression"]

    for condition, sub_conditions in task_details.items():
        logic_text += f"  If the condition is {condition}:\n"
        if isinstance(sub_conditions, dict):
            for sub_condition, models in sub_conditions.items():
                if sub_condition == "requires_regularization":
                    logic_text += f"    If {sub_condition}:\n"
                    for regularization_type, reg_models in models.items():
                        if regularization_type == "default":
                            logic_text += "      Else:\n"
                        else:
                            logic_text += f"      If {regularization_type}:\n"
                        for approach, model_list in reg_models.items():
                            logic_text += f"        Use {approach} models: {', '.join(model_list)}\n"
                else:
                    logic_text += f"    If {sub_condition}:\n"
                    for approach, model_list in models.items():
                        logic_text += f"      Use {approach} models: {', '.join(model_list)}\n"
        else:
            for approach, model_list in sub_conditions.items():
                logic_text += f"    Use {approach} models: {', '.join(model_list)}\n"

    return logic_text


In [None]:
def clustering_logic(data):
    """Handles logic for clustering tasks."""
    logic_text = "If the problem is a clustering task:\n"
    task_details = data["tasks"]["clustering"]

    for condition, models in task_details.items():
        logic_text += f"  If the condition is {condition}:\n"
        for approach, model_list in models.items():
            logic_text += f"    Use {approach} models: {', '.join(model_list)}\n"
    return logic_text

In [None]:
def dimensionality_reduction_logic(data):
    """Handles logic for dimensionality reduction tasks."""
    logic_text = "If the problem is dimensionality reduction:\n"
    task_details = data["tasks"]["dimensionality_reduction"]

    for condition, models in task_details.items():
        logic_text += f"  If the condition is {condition}:\n"
        for approach, model_list in models.items():
            logic_text += f"    Use {approach} models: {', '.join(model_list)}\n"
    return logic_text

In [None]:
def generate_numerical_logic(data_type, task):
    """
    Generate ML/DL decision logic dynamically based on input parameters.

    Args:
        data_type (str): Type of data (e.g., 'Numerical').
        task (str): Task type (e.g., 'Regression', 'Clustering', 'Dimensionality Reduction').

    Returns:
        str: Decision logic text.
    """
    validate_parameters(data_type, task)
    if task == "regression":
        return regression_logic(numerical_data_rules)
    elif task == "clustering":
        return clustering_logic(numerical_data_rules)
    elif task == "dimensionality_reduction":
        return dimensionality_reduction_logic(numerical_data_rules)
    else:
        raise ValueError(f"Task {task} not supported for data type {data_type}.")

In [None]:
# Generate logic for numerical regression task
print(generate_numerical_logic("Numerical", "regression"))

If the problem is a regression task:
  If the condition is linear:
    If two_variables:
      Use ML models: Simple Linear Regression
    If multiple_variables:
      Use ML models: Multiple Linear Regression
  If the condition is non_linear:
    If curve_fitting:
      Use ML models: Polynomial Regression
    If decision_boundaries:
      Use ML models: Support Vector Regression (SVR)
    If requires_regularization:
      If feature_selection:
        Use ML models: Lasso Regression
      If high_dimensional:
        Use ML models: Ridge Regression
      Else:
        Use ML models: Elastic Net Regression
    If default:
      Use ML models: Gradient Boosting
      Use DL models: Deep Neural Networks (DNN)



In [None]:
# Generate logic for numerical clustering task
print(generate_numerical_logic("Numerical", "clustering"))

If the problem is a clustering task:
  If the condition is distinct_clusters:
    Use ML models: K-Means Clustering
  If the condition is hierarchical_structure:
    Use ML models: Hierarchical Clustering
  If the condition is arbitrary_shapes:
    Use ML models: DBSCAN
  If the condition is density_based:
    Use ML models: Mean Shift Clustering
  If the condition is default:
    Use ML models: Agglomerative Clustering
    Use DL models: Autoencoder-based Clustering



In [None]:
# Generate logic for numerical dimensionality reduction task
print(generate_numerical_logic("Numerical", "dimensionality_reduction"))

If the problem is dimensionality reduction:
  If the condition is maximize_variance:
    Use ML models: Principal Component Analysis (PCA)
  If the condition is supervised_class_separation:
    Use ML models: Linear Discriminant Analysis (LDA)
  If the condition is local_structure:
    Use ML models: t-SNE
  If the condition is local_global_structure:
    Use ML models: UMAP
  If the condition is default:
    Use ML models: Independent Component Analysis (ICA)
    Use DL models: Variational Autoencoders (VAE)

