# Dependencies

In [29]:
import numpy as np
import json
import pandas as pd
pd.set_option('display.max_columns', 10)

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC, SVR
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.base import is_classifier

In [30]:
import sklearn
print("scikit-learn version:", sklearn.__version__)

scikit-learn version: 1.2.2


# Importing data

In [31]:
df = pd.read_csv(r"C:\Users\Kritin\Desktop\Imarticus\Hackathon\Stage 2\iris_modified.csv")

In [32]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [33]:
df.shape

(150, 5)

In [34]:
json_file = "C:/Users/Kritin/Desktop/Imarticus/Hackathon/Stage 2/algoparams_from_ui1.json"
with open(json_file, 'r') as f:
    config = json.load(f)

In [57]:
test = [None, config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("min_depth"), config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("max_depth")]
print(test)

[None, 20, 30]


# Feature handling

### Feature Elimination

In [36]:
feature_handling = config.get("design_state_data", {}).get("feature_handling", {})
for feature, details in feature_handling.items():
    if not details.get("is_selected", True):
        df.drop(columns=feature, inplace=True)

# for feature, details in feature_handling.items(): This line iterates over each item (key-value pair)
# in the feature_handling dictionary. Each item corresponds to a feature (key) and its details (value).
# if not details.get("is_selected", True): Inside the loop, it checks if the value of "is_selected" in the details dictionary is False.

In [37]:
df.shape

(150, 4)

In [38]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,species
0,5.1,3.5,1.4,Iris-setosa
1,4.9,3.0,1.4,Iris-setosa
2,4.7,3.2,1.3,Iris-setosa
3,4.6,3.1,1.5,Iris-setosa
4,5.0,3.6,1.4,Iris-setosa


### Handling Missing Values

In [39]:
feature_handling = config.get("design_state_data", {}).get("feature_handling", {})
for feature, details in feature_handling.items():
    feature_name = details.get("feature_name")
    if feature_name in df.columns:
        if details.get("missing_values") == "Impute":
            impute_value = details.get("impute_with", "mean") # It retrieves the imputation method specified in the "impute_with" key from the details dictionary. If "impute_with" key is not specified, it defaults to "mean".
            if impute_value == "Average of values":
                df[feature_name].fillna(data[feature_name].mean(), inplace=True)
            elif impute_value == "median":
                df[feature_name].fillna(data[feature_name].median(), inplace=True)
            elif impute_value == "mode":
                df[feature_name].fillna(data[feature_name].mode()[0], inplace=True)
            elif impute_value == "Tokenize and hash":
                df[feature_name].fillna(data[feature_name].apply(lambda x: hashlib.sha256(x.encode()).hexdigest()), inplace=True)

In [40]:
df.shape

(150, 4)

In [41]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,species
0,5.1,3.5,1.4,Iris-setosa
1,4.9,3.0,1.4,Iris-setosa
2,4.7,3.2,1.3,Iris-setosa
3,4.6,3.1,1.5,Iris-setosa
4,5.0,3.6,1.4,Iris-setosa


# Splitting the data

In [42]:
target = config["design_state_data"]["target"]["target"]

In [43]:
X = df.drop(columns=[target])
y = df[target]

In [44]:
print(X.shape)
print(y.shape)

(150, 3)
(150,)


In [45]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length
0,5.1,3.5,1.4
1,4.9,3.0,1.4
2,4.7,3.2,1.3
3,4.6,3.1,1.5
4,5.0,3.6,1.4


In [46]:
y

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: species, Length: 150, dtype: object

# Model Building

### For my reference

In [47]:

#   {
#  'RandomForestClassifier': {'model_name': 'Random Forest Classifier', 'is_selected': True, 'min_trees': 10, 'max_trees': 30, 'feature_sampling_statergy': 'Default', 'min_depth': 20, 'max_depth': 30, 'min_samples_per_leaf_min_value': 5, 'min_samples_per_leaf_max_value': 50, 'parallelism': 0},
#  'RandomForestRegressor': {'model_name': 'Random Forest Regressor', 'is_selected': False, 'min_trees': 10, 'max_trees': 20, 'feature_sampling_statergy': 'Default', 'min_depth': 20, 'max_depth': 25, 'min_samples_per_leaf_min_value': 5, 'min_samples_per_leaf_max_value': 10, 'parallelism': 0}, 
#  'LinearRegression': {'model_name': 'LinearRegression', 'is_selected': False, 'parallelism': 2, 'min_iter': 30, 'max_iter': 50, 'min_regparam': 0.5, 'max_regparam': 0.8, 'min_elasticnet': 0.5, 'max_elasticnet': 0.8}, 
#  'LogisticRegression': {'model_name': 'LogisticRegression', 'is_selected': False, 'parallelism': 2, 'min_iter': 30, 'max_iter': 50, 'min_regparam': 0.5, 'max_regparam': 0.8, 'min_elasticnet': 0.5, 'max_elasticnet': 0.8}, 
#  'RidgeRegression': {'model_name': 'RidgeRegression', 'is_selected': False, 'regularization_term': 'Specify values to test', 'min_iter': 30, 'max_iter': 50, 'min_regparam': 0.5, 'max_regparam': 0.8}, 
#  'LassoRegression': {'model_name': 'Lasso Regression', 'is_selected': False, 'regularization_term': 'Specify values to test', 'min_iter': 30, 'max_iter': 50, 'min_regparam': 0.5, 'max_regparam': 0.8}, 
#  'ElasticNetRegression': {'model_name': 'Lasso Regression', 'is_selected': False, 'regularization_term': 'Specify values to test', 'min_iter': 30, 'max_iter': 50, 'min_regparam': 0.5, 'max_regparam': 0.8, 'min_elasticnet': 0.5, 'max_elasticnet': 0.8}, 
#  'xg_boost': {'model_name': 'XG Boost', 'is_selected': False, 'use_gradient_boosted_tree': True, 'dart': True, 'tree_method': '', 'random_state': 0, 'max_num_of_trees': 0, 'early_stopping': True, 'early_stopping_rounds': 2, 'max_depth_of_tree': [56, 89], 'learningRate': [89, 76], 'l1_regularization': [77], 'l2_regularization': [78], 'gamma': [68], 'min_child_weight': [67], 'sub_sample': [67], 'col_sample_by_tree': [67], 'replace_missing_values': False, 'parallelism': 0}, 
#  'DecisionTreeRegressor': {'model_name': 'Decision Tree', 'is_selected': False, 'min_depth': 4, 'max_depth': 7, 'use_gini': False, 'use_entropy': True, 'min_samples_per_leaf': [12, 6], 'use_best': True, 'use_random': True}, 
#  'DecisionTreeClassifier': {'model_name': 'Decision Tree', 'is_selected': True, 'min_depth': 4, 'max_depth': 7, 'use_gini': False, 'use_entropy': True, 'min_samples_per_leaf': [12, 6], 'use_best': True, 'use_random': False}, 
#  'SVM': {'model_name': 'Support Vector Machine', 'is_selected': False, 'linear_kernel': True, 'rep_kernel': True, 'polynomial_kernel': True, 'sigmoid_kernel': True, 'c_value': [566, 79], 'auto': True, 'scale': True, 'custom_gamma_values': True, 'tolerance': 7, 'max_iterations': 7}, 
#  'KNN': {'model_name': 'KNN', 'is_selected': False, 'k_value': [78], 'distance_weighting': True, 'neighbour_finding_algorithm': 'Automatic', 'random_state': 0, 'p_value': 0}, 
#  'neural_network': {'model_name': 'Neural Network', 'is_selected': False, 'hidden_layer_sizes': [67, 89], 'activation': '', 'alpha_value': 0, 'max_iterations': 0, 'convergence_tolerance': 0, 'early_stopping': True, 'solver': 'ADAM', 'shuffle_data': True, 'initial_learning_rate': 0, 'automatic_batching': True, 'beta_1': 0, 'beta_2': 0, 'epsilon': 0, 'power_t': 0, 'momentum': 0, 'use_nesterov_momentum': False}
#   }

### Fetching the Algorithms to be built

In [48]:
# Initialize selected_algorithms dictionary
selected_algorithms = {}

# Iterate over algorithms and select the ones that are marked as selected
algorithms = config.get("design_state_data", {}).get("algorithms", {})
for algorithm, details in algorithms.items():
    if details.get("is_selected"):
        # Extract model name
        model_name = details.get("model_name")
        selected_algorithms[algorithm] = model_name


# Print selected algorithms
print("Selected Algorithms")
for algorithm, model_name in selected_algorithms.items():
    print(f"Model Family: {algorithm}")
    print(f"Model: {model_name}")
    print("")

Selected Algorithms
Model Family: RandomForestClassifier
Model: Random Forest Classifier

Model Family: DecisionTreeClassifier
Model: Decision Tree



### Splitting data into training and testing sets

In [49]:
random_state = config.get("design_state_data").get("train").get("random_seed")
train_ratio = config.get("design_state_data").get("train").get("train_ratio")

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_ratio, random_state=random_state)

print("X_train:",X_train.shape)
print("y_train:",y_train.shape)
print("X_test:",X_test.shape)
print("y_test:",y_test.shape)

X_train: (120, 3)
y_train: (120,)
X_test: (30, 3)
y_test: (30,)


### Dictionary mapping model names to their respective classes

In [50]:
model_classes = {
    "Random Forest Classifier": RandomForestClassifier,
    "Random Forest Regressor": RandomForestRegressor,
    "LinearRegression": LinearRegression,
    "LogisticRegression": LogisticRegression,
    "RidgeRegression": Ridge,
    "Lasso Regression": Lasso,
    "ElasticNet Regression": ElasticNet,
    "Decision Tree": DecisionTreeClassifier,
    "Decision Tree Regressor": DecisionTreeRegressor,
    "Support Vector Machine": SVC,
    "Support Vector Regression": SVR,
    "KNN": KNeighborsClassifier,
    "KNN Regressor": KNeighborsRegressor,
    "Neural Network Classifier": MLPClassifier,
    "Neural Network Regressor": MLPRegressor,
    "XG Boost": XGBClassifier,
    "XG Boost Regressor": XGBRegressor
}

### Store This (Experimentation)

In [51]:
# # Hyperparameters
        # hyperparameters = {}
        
        # # Adjust hyperparameters based on model type
        # if model_name == "Random Forest Classifier" or model_name == "Random Forest Regressor":
        #     hyperparameters.update({
        #         "n_estimators": details.get("max_trees"),
        #         "max_depth": details.get("max_depth"),
        #         "min_samples_split": details.get("min_samples_per_leaf_min_value"),
        #         "min_samples_leaf": details.get("min_samples_per_leaf_max_value"),
        #         "random_state": details.get("random_seed"),
        #         "max_features": None if details.get("feature_sampling_statergy") == "Default" else details.get("feature_sampling_statergy")
        #     })
        # elif model_name == "Random Forest Regressor":
        #     hyperparameters.update({
        #         "n_estimators": details.get("max_trees"),
        #         "max_depth": details.get("max_depth"),
        #         "min_samples_split": details.get("min_samples_per_leaf_min_value"),
        #         "min_samples_leaf": details.get("min_samples_per_leaf_max_value"),
        #         "random_state": details.get("random_seed"),
        #         "max_features": None if details.get("feature_sampling_statergy") == "Default" else details.get("feature_sampling_statergy")
        #     })
        # elif model_name == "LinearRegression":
        #     hyperparameters.update({
        #         "fit_intercept": details.get("fit_intercept"),
        #         "normalize": details.get("normalize")
        #     })
        # elif model_name == "LogisticRegression":
        #     hyperparameters.update({
        #         "max_iter" : details.get("max_iter")
        #     })
        # elif model_name == "RidgeRegression":
        #     hyperparameters.update({
        #         "max_iter" : details.get("max_iter")
        #     })
        # elif model_name == "Lasso Regression":
        #     hyperparameters.update({
        #         "max_iter" : details.get("max_iter")
        #     })
        # elif model_name == "Lasso Regression":
        #     hyperparameters.update({
        #         "max_iter" : details.get("max_iter")
        #     })
        # elif model_name == "XG Boost":
        #     hyperparameters.update({
        #         "random_state" : details.get("random_state"),
        #         "max_leaves" : details.get("max_num_of_trees"),
        #         "early_stopping_rounds" : details.get("early_stopping_rounds"),
        #         "max_depth" : details.get("max_depth_of_tree"),
        #         "learningRate" : details.get(learningRate"),
        #         "min_child_weight" : details.get("min_child_weight"),
        #         "subsample" : details.get("sub_sample"),
        #         "colsample_by_tree" : details.get("col_sample_by_tree"),
        #         "gamma" : details.get("gamma")
        #         "num_parallel_tree" : details.get("parallelism"),
        #         "reg_lambda" : details.get("l2_regularization"),
        #         "reg_alpha" : details.get("l1_regularization")
        #     })
        # elif model_name == "Decision Tree" and algorithms == "DecisionTreeRegressor":
        #     hyperparameters.update({
        #         "max_depth" : details("max_depth"),
        #         "min_samples_leaf" : details("min_samples_per_leaf")
        #         "splitter": "best" if details.get("use_best") else "random"
        #     })
        # elif model_name == "Decision Tree" and algorithms == "DecisionTreeClassifier":
        #     hyperparameters.update({
        #         "max_depth" : details("max_depth"),
        #         "min_samples_leaf" : details("min_samples_per_leaf"),
        #         "splitter" : "random" if details.get("use_random") else "best",
        #         "criterion" : "entropy" if details.get("use_entropy") else "gini"
        #     })
        # elif model_name == "Support Vector Machine":
        #     hyperparameters.update({
        #         "kernel" : "linear" if details.get("linear_kernel") else ("precomputed" if details.get("rep_kernel") else ("poly" if details.get("polynomial_kernel") else ("sigmoid" if details.get("sigmoid_kernel") else "rbf"))),
        #         "C" : details.get("c_value")
        #     })
        # # Adding the model name and adjusting hyperparameters to selected_algorithms dictionary
        # selected_algorithms[model_name] = hyperparameters

### Fetching the Hyperparameters

In [59]:
param_grids = {
    "Random Forest Classifier": {
        "n_estimators" : list(range(config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("min_trees") , config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("max_trees"))),
        "max_depth" : [None, config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("min_depth"), config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("max_depth")],
        "min_samples_split" : [2, 5, 10],
        "min_samples_leaf" : list(range(config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("min_samples_per_leaf_min_value") , config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("min_samples_per_leaf_max_value"))),
        "n_jobs" : [None if config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("parallelism") == 0 else config.get("design_state_data").get("algorithms").get("RandomForestClassifier").get("parallelism")]
    },
    "Random Forest Regressor": {
        "n_estimators" : list(range(config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("min_trees") , config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("max_trees"))),
        "max_depth" : [None, config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("min_depth"), config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("max_depth")],
        "min_samples_split" : [2, 5, 10],
        "min_samples_leaf" : list(range(config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("min_samples_per_leaf_min_value") , config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("min_samples_per_leaf_max_value"))),
        "n_jobs" : [None if config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("parallelism") == 0 else config.get("design_state_data").get("algorithms").get("RandomForestRegressor").get("parallelism")]
    },
    "LinearRegression": {
        "l1_ratio" : [range(int(config.get("design_state_data").get("algorithms").get("LinearRegression").get("min_elasticnet")), int(config.get("design_state_data").get("algorithms").get("LinearRegression").get("max_elasticnet")))],
        "alpha" : [range(int(config.get("design_state_data").get("algorithms").get("LinearRegression").get("min_regparam")), int(config.get("design_state_data").get("algorithms").get("LinearRegression").get("max_regparam")))],
        "max_iter" : [range(config.get("design_state_data").get("algorithms").get("LinearRegression").get("min_iter"), config.get("design_state_data").get("algorithms").get("LinearRegression").get("max_iter"))],
        "n_jobs" : [None if config.get("design_state_data").get("algorithms").get("LinearRegression").get("parallelism") == 0 else config.get("design_state_data").get("algorithms").get("LinearRegression").get("parallelism")]
    },
    "LogisticRegression": {
        "l1_ratio" : [range(int(config.get("design_state_data").get("algorithms").get("LogisticRegression").get("min_elasticnet")), int(config.get("design_state_data").get("algorithms").get("LogisticRegression").get("max_elasticnet")))],
        "alpha" : [range(int(config.get("design_state_data").get("algorithms").get("LogisticRegression").get("min_regparam")), int(config.get("design_state_data").get("algorithms").get("LogisticRegression").get("max_regparam")))],
        "max_iter" : [range(config.get("design_state_data").get("algorithms").get("LogisticRegression").get("min_iter"), config.get("design_state_data").get("algorithms").get("LogisticRegression").get("max_iter"))],
        "n_jobs" : [None if config.get("design_state_data").get("algorithms").get("LogisticRegression").get("parallelism") == 0 else config.get("design_state_data").get("algorithms").get("LogisticRegression").get("parallelism")]
    },
    "RidgeRegression": {
        "alpha" : [range(int(config.get("design_state_data").get("algorithms").get("RidgeRegression").get("min_regparam")), int(config.get("design_state_data").get("algorithms").get("RidgeRegression").get("max_regparam")))],
        "max_iter" : [range(config.get("design_state_data").get("algorithms").get("RidgeRegression").get("min_iter"), config.get("design_state_data").get("algorithms").get("RidgeRegression").get("max_iter"))]
    },
    "LassoRegression": {
        "alpha" : [range(int(config.get("design_state_data").get("algorithms").get("LassoRegression").get("min_regparam")), int(config.get("design_state_data").get("algorithms").get("LassoRegression").get("max_regparam")))],
        "max_iter" : [range(config.get("design_state_data").get("algorithms").get("LassoRegression").get("min_iter"), config.get("design_state_data").get("algorithms").get("LassoRegression").get("max_iter"))]
    },
    "ElasticNetRegression": {
        "l1_ratio" : [range(int(config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("min_elasticnet")), int(config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("max_elasticnet")))],
        "alpha" : [range(int(config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("min_regparam")), int(config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("max_regparam")))],
        "max_iter" : [range(config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("min_iter"), config.get("design_state_data").get("algorithms").get("ElasticNetRegression").get("max_iter"))]
    },
    "xg_boost": {
        "booster" : ["gbtree" if config.get("design_state_data").get("algorithms").get("xg_boost").get("gbtree") else ("gblinear" if config.get("design_state_data").get("algorithms").get("xg_boost").get("gblinear") else "dart")],
        "random_state" : [config.get("design_state_data").get("algorithms").get("xg_boost").get("random_state")],
        "early_stopping_rounds" : [None if config.get("design_state_data").get("algorithms").get("xg_boost").get("early_stopping_rounds") == 0 else config.get("design_state_data").get("algorithms").get("xg_boost").get("early_stopping_rounds")],
        "max_depth_of_tree" : config.get("design_state_data").get("algorithms").get("xg_boost").get("max_depth_of_tree"),
        "learningRate" : config.get("design_state_data").get("algorithms").get("xg_boost").get("learningRate"),
        "reg_alpha" : config.get("design_state_data").get("algorithms").get("xg_boost").get("l1_regularization"),
        "reg_lambda" : config.get("design_state_data").get("algorithms").get("xg_boost").get("l2_regularization"),
        "gamma" : config.get("design_state_data").get("algorithms").get("xg_boost").get("gamma"),
        "min_child_weight" : config.get("design_state_data").get("algorithms").get("xg_boost").get("min_child_weight"),
        "subsample" : config.get("design_state_data").get("algorithms").get("xg_boost").get("subsample"),
        "colsample_bytree" : config.get("design_state_data").get("algorithms").get("xg_boost").get("col_sample_by_tree"),
        "n_jobs" : [None if config.get("design_state_data").get("algorithms").get("xg_boost").get("parallelism") == 0 else config.get("design_state_data").get("algorithms").get("xg_boost").get("parallelism")]
    },
    "Decision Tree": {
        "max_depth": [None, config.get("design_state_data").get("algorithms").get("DecisionTreeClassifier").get("min_depth"), config.get("design_state_data").get("algorithms").get("DecisionTreeClassifier").get("max_depth")],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": config.get("design_state_data").get("algorithms").get("DecisionTreeClassifier").get("min_samples_per_leaf"),
        "criterion" : ["entropy" if config.get("design_state_data").get("algorithms").get("DecisionTreeClassifier").get("use_entropy") else "gini"],
        "splitter" : ["random" if config.get("design_state_data").get("algorithms").get("DecisionTreeClassifier").get("use_random") else "best"]
    },
    "Decision Tree Regressor": {
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    "SVM" : {
        'C': config.get("design_state_data").get("algorithms").get("SVM").get("c_value"),
        'kernel': ["linear" if config.get("design_state_data").get("algorithms").get("SVM").get("linear_kernel") else None, "poly" if config.get("design_state_data").get("algorithms").get("SVM").get("polynomial_kernel") else None, "sigmoid" if config.get("design_state_data").get("algorithms").get("SVM").get("sigmoid_kernel") else None, "rbf" if config.get("design_state_data").get("algorithms").get("SVM").get("rbf_kernel") else None],
        'gamma': ["scale" if config.get("design_state_data").get("algorithms").get("SVM").get("scale") else None, "auto" if config.get("design_state_data").get("algorithms").get("SVM").get("auto") else None],
    },
    "KNN" : {
        "n_neighbors" : config.get("design_state_data").get("algorithms").get("KNN").get("k_value"),
        "weights" : ["distance" if config.get("design_state_data").get("algorithms").get("KNN").get("distance_weighting") else "uniform"],
        "algorithms" : ["ball_tree"if config.get("design_state_data").get("algorithms").get("KNN").get("neighbour_finding_algorithm") == "ball_tree" else ("kd_tree"if config.get("design_state_data").get("algorithms").get("KNN").get("neighbour_finding_algorithm") == "kd_tree" else ("brute"if config.get("design_state_data").get("algorithms").get("KNN").get("neighbour_finding_algorithm") == "brute" else ("auto")))],
        "random_state" : [config.get("design_state_data").get("algorithms").get("KNN").get("random_state")],
        "p" : [config.get("design_state_data").get("algorithms").get("KNN").get("p_value")]
    },
    "Neural Network" : {
        "hidden_layer_sizes" : config.get("design_state_data").get("algorithms").get("neural_network").get("hidden_layer_sizes"),
        "activation" : ["identity"if config.get("design_state_data").get("algorithms").get("neural_network").get("activation") == "identity" else ("logistic"if config.get("design_state_data").get("algorithms").get("neural_network").get("activation") == "logistic" else ("tanh"if config.get("design_state_data").get("algorithms").get("neural_network").get("activation") == "tanh" else ("relu")))],
        "alpha" : [config.get("design_state_data").get("algorithms").get("neural_network").get("alpha_value")],
        "max_iter" : [config.get("design_state_data").get("algorithms").get("neural_network").get("max_iterations")],
        "tol" : [config.get("design_state_data").get("algorithms").get("neural_network").get("convergence_tolerance")],
        "early_stopping" : [config.get("design_state_data").get("algorithms").get("neural_network").get("early_stopping")],
        "solver" : [config.get("design_state_data").get("algorithms").get("neural_network").get("solver")],
        "shuffle" : [config.get("design_state_data").get("algorithms").get("neural_network").get("shuffle_data")],
        "learning_rate_init" : [config.get("design_state_data").get("algorithms").get("neural_network").get("initial_learning_rate")],
        "batch_size" : ["auto" if config.get("design_state_data").get("algorithms").get("neural_network").get("automatic_batching") else ("full" if config.get("design_state_data").get("algorithms").get("neural_network").get("full_batching") else (config.get("design_state_data").get("algorithms").get("neural_network").get("batch_size")))],
        "beta_1": [config.get("design_state_data").get("algorithms").get("neural_network").get("beta_1")],
        "beta_2": [config.get("design_state_data").get("algorithms").get("neural_network").get("automatic_batching")],
        "epsilon": [config.get("design_state_data").get("algorithms").get("neural_network").get("beta_2")],
        "power_t": [config.get("design_state_data").get("algorithms").get("neural_network").get("power_t")],
        "momentum": [config.get("design_state_data").get("algorithms").get("neural_network").get("momentum")],
        "nesterovs_momentum" : [config.get("design_state_data").get("algorithms").get("neural_network").get("use_nesterov_momentum")]
    }
}

### Hyperparameter tuning of all the models

In [60]:
# Iterate over selected model families and model names
for model_family, model_name in selected_algorithms.items():
    # Get the class corresponding to the model name
    model_class = model_classes.get(model_name)
    
    # Check if model_class is not None
    if model_class is not None:
        
        # Initialize the model
        model = model_class()
        
        # Define the parameter grid for the model
        param_grid = param_grids[model_name]
        
        # Define Scoring 
        if is_classifier(model):
            # For classification tasks
            scoring = 'accuracy'
        else:
            # For regression tasks
            scoring = 'neg_mean_squared_error'
        
        # Initialize GridSearchCV with the model and hyperparameters
        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=5)
        
        # Fit the GridSearchCV object to the training data
        grid_search.fit(X_train, y_train)
        
        # Print the best hyperparameters found
        print("Best hyperparameters for", model_name, ":", grid_search.best_params_)
        
        # Print the best score found
        print("Best", scoring, ":", grid_search.best_score_)

        # Evaluate the model on the test set
        test_score = grid_search.score(X_test, y_test)
        print("Test", scoring, ":", test_score)
        print()
        
    else:
        print(f"Model class for '{model_name}' not found in model_classes dictionary.")


Best hyperparameters for Random Forest Classifier : {'max_depth': 30, 'min_samples_leaf': 11, 'min_samples_split': 5, 'n_estimators': 13, 'n_jobs': None}
Best accuracy : 0.9583333333333334
Test accuracy : 0.9

Best hyperparameters for Decision Tree : {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 12, 'min_samples_split': 2, 'splitter': 'best'}
Best accuracy : 0.95
Test accuracy : 0.9333333333333333

