In [None]:
import sklearn.tree # type: ignore

# Create a DecisionTreeRegressor model with default values explicitly assigned
decisiontree_model = sklearn.tree.DecisionTreeRegressor(
    criterion="squared_error",           # Default: "squared_error". Measures the quality of a split.
                                         # - "squared_error": Minimizes mean squared error.
                                         # - "friedman_mse": Optimized for boosting.
                                         # - "absolute_error": Minimizes mean absolute error.
                                         # - "poisson": Suitable for count data.
    
    splitter="best",                     # Default: "best". Strategy for splitting at each node:
                                         # - "best": Chooses the best split based on the criterion.
                                         # - "random": Chooses a random split at each node.
    
    max_depth=None,                      # Default: None. Maximum depth of the tree.
                                         # - None: Expands nodes until leaves are pure or
                                         #         have fewer samples than min_samples_split.
    
    min_samples_split=2,                 # Default: 2. Minimum samples required to split a node:
                                         # - Integer: Exact number (e.g., 2).
                                         # - Float: Fraction of total samples (e.g., 0.1 for 10%).
    
    min_samples_leaf=1,                  # Default: 1. Minimum samples required at a leaf node:
                                         # - Integer: Absolute number (e.g., 1).
                                         # - Float: Fraction of total samples (e.g., 0.05 for 5%).
    

    
    min_weight_fraction_leaf=0.0,        # Default: 0. Minimum weighted fraction of total weight at a leaf node.
                                         # Useful for datasets with sample weights.
    
    max_features=None,                   # Default: None. Number of features to consider for the best split:
                                         # - None: Considers all features.
                                         # - "auto": Same as None.
                                         # - "sqrt": Square root of the number of features.
                                         # - "log2": Logarithm base 2 of the features.
                                         # - Integer or float: Exact number or percentage of features.
    
    random_state=None,                   # Default: None. Seed for random number generator for reproducibility.
                                         # - None: Random behavior.
                                         # - Integer: Fixed seed for deterministic results.
    
    max_leaf_nodes=None,                 # Default: None. Maximum number of leaf nodes:
                                         # - None: Unlimited number of leaf nodes.
    
    min_impurity_decrease=0.0,           # Default: 0. Minimum impurity decrease required to split a node.
                                         # Helps control overfitting by avoiding unnecessary splits.
    
    ccp_alpha=0.0                        # Default: 0. Complexity parameter for cost-complexity pruning.
                                         # Larger values prune the tree more aggressively.
)


param_grid = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], # Split quality measure. 
    # "squared_error" is for regression tasks, "absolute_error" is more robust to outliers. 
    # "friedman_mse" optimizes for speed, "poisson" is used for count data.

    'splitter': ['best', 'random'],  # Determines how to choose the split at each node.
    # "best" chooses the best split, "random" may speed up training by selecting a random split.

    'max_depth': [None, 5, 10, 20, 50],  # Maximum depth of the tree. Controls model complexity.
    # Small values (e.g., 5) prevent overfitting but might underfit. Large values (e.g., 50) increase complexity and risk overfitting.

    'min_samples_split': [2, 5, 10, 20],  # Minimum samples required to split an internal node.
    # Small values (e.g., 2) allow the tree to grow deeper but may lead to overfitting. Large values (e.g., 20) result in more generalization.

    'min_samples_leaf': [1, 2, 5, 10],  # Minimum samples required at a leaf node.
    # Small values (e.g., 1) may cause overfitting, while larger values (e.g., 10) can ensure more stable predictions.

    'min_weight_fraction_leaf': [0.0, 0.01, 0.05, 0.1],  # Minimum weighted fraction of samples in a leaf.
    # Small values (e.g., 0.0) give more freedom for leaf nodes, while large values (e.g., 0.1) prevent too small leaf nodes.

    'max_features': [None, 'auto', 'sqrt', 'log2', 0.5, 0.7],  # Number of features considered for a split.
    # Small values (e.g., 0.5) increase bias but reduce variance. Large values (e.g., 'auto') lead to more flexible trees, risking overfitting.

    'random_state': [42],  # Ensures reproducibility of results.
    # Fixed value ensures that results are consistent across runs.

    'max_leaf_nodes': [None, 10, 20, 50, 100],  # Maximum number of leaf nodes.
    # Small values (e.g., 10) prevent the tree from growing too complex. Large values allow more splits, increasing flexibility.

    'min_impurity_decrease': [0.0, 0.01, 0.05],  # Minimum impurity decrease for a split.
    # Small values (e.g., 0.0) make splits easier to perform, while larger values (e.g., 0.05) result in fewer splits and simpler trees.

    'ccp_alpha': [0.0, 0.01, 0.05, 0.1],  # Regularization for pruning to reduce overfitting.
    # Small values (e.g., 0.0) allow for more complex trees, while large values (e.g., 0.1) prune aggressively, preventing overfitting.
}
