In [None]:
#i data handling libs
import numpy as np
import pandas as pd
import warnings

# Ignore display of unnecessary warnings
def fxn():
    warnings.warn("deprecated", DeprecationWarning)
warnings.filterwarnings("ignore")
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()
    
# data preprocessing libs
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# sklearn classifiers to import
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# tensorflow classifier import fix it 
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# * OLD ONE from tensorflow.contrib.learn import DNNClassifier

# model building, predict, accuracy imports
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from IPython.display import display

In [None]:
#i
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
tf.get_logger().setLevel('FATAL')

# Load dataset
data = pd.read_csv("/kaggle/input/iriscsv/iris.csv", names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])
print('Dataset used: Iris Data set')
print('Number of instances in dataset:', len(data))
print('Number of attributes in dataset:', len(data.columns) - 1)
num_folds = 15

# Encode target labels
le = LabelEncoder()
data['species'] = le.fit_transform(data['species'])

# Convert strings to numeric and handle any NaNs
data['sepal_length'] = pd.to_numeric(data['sepal_length'], errors='coerce')
data.dropna(inplace=True)

# Split features and target
X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.18, random_state=42)

In [None]:
# Hyperparameter grids
random_forest_params = {
    'n_estimators': [5, 10, 15, 20, 25],
    'criterion': ['gini', 'entropy'],
    'max_features': [2, 3, 4, 'auto', 'log2', 'sqrt', None],
    'bootstrap': [False, True]
}
decision_tree_params = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'min_samples_split': [2, 3, 4],
    'max_features': [2, 3, 'auto', 'log2', 'sqrt', None],
    'class_weight': ['balanced', None]
}
perceptron_params = {
    'penalty': [None, 'l2', 'l1', 'elasticnet'],
    'fit_intercept': [False, True],
    'shuffle': [False, True],
    'class_weight': ['balanced', None],
    'alpha': [0.0001, 0.00025],
    'max_iter': [30, 50, 90]
}
svm_params = {
    'shrinking': [False, True],
    'degree': [3, 4],
    'class_weight': ['balanced', None]
}
neural_net_params = {
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'hidden_layer_sizes': [(20, 15, 10), (30, 20, 15, 10), (16, 8, 4)],
    'max_iter': [50, 80, 150],
    'solver': ['adam', 'lbfgs'],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'shuffle': [True, False]
}
log_reg_params = {
    'class_weight': ['balanced', None],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag'],
    'fit_intercept': [True, False]
}
knn_params = {
    'n_neighbors': [2, 3, 5, 10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [5, 10, 15, 20]
}
bagging_params = {
    'n_estimators': [5, 12, 15, 20],
    'bootstrap': [False, True]
}
ada_boost_params = {
    'n_estimators': [50, 75, 100],
    'algorithm': ['SAMME', 'SAMME.R']
}
gradient_boosting_params = {
    'n_estimators': [15, 25, 50]
}
"""
# Build parameters of all classifiers 
You can try this like difference 
random_forest_params = dict(n_estimators=[5, 10, 15, 20, 25], criterion=['gini', 'entropy'], 
                            max_features=[2, 3, 4, 'auto', 'log2', 'sqrt', None], bootstrap=[False, True]
                            )
decision_tree_params = dict(criterion=['gini', 'entropy'], splitter=['best', 'random'], min_samples_split=[2, 3, 4],
                            max_features=[2,3,'auto', 'log2', 'sqrt', None], class_weight=['balanced', None], presort=[False, True])

perceptron_params = dict(penalty=[None, 'l2', 'l1', 'elasticnet'], fit_intercept=[False, True], shuffle=[False, True],
                         class_weight=['balanced', None], alpha=[0.0001, 0.00025], max_iter=[30,50,90])

svm_params = dict(shrinking=[False, True], degree=[3,4], class_weight=['balanced', None])

neural_net_params = dict(activation=['identity', 'logistic', 'tanh', 'relu'], hidden_layer_sizes = [(20,15,10),(30,20,15,10),(16,8,4)], 
                         max_iter=[50,80,150], solver=['adam','lbfgs'], learning_rate=['constant', 'invscaling', 'adaptive'], shuffle=[True, False])

log_reg_params = dict(class_weight=['balanced', None], solver=['newton-cg', 'lbfgs', 'liblinear', 'sag'], fit_intercept=[True, False])

knn_params = dict(n_neighbors=[2, 3, 5, 10], weights=['uniform', 'distance'],
                  algorithm=['auto', 'ball_tree', 'kd_tree', 'brute'], leaf_size=[5,10,15,20])

bagging_params = dict(n_estimators=[5, 12, 15, 20], bootstrap=[False, True])

ada_boost_params = dict(n_estimators=[50, 75, 100], algorithm=['SAMME', 'SAMME.R'])

guassiannb_params = dict()

gradient_boosting_params = dict(n_estimators=[15, 25, 50])

params = [
    random_forest_params, decision_tree_params, perceptron_params,
    svm_params, neural_net_params, log_reg_params, knn_params,
    bagging_params, ada_boost_params, guassiannb_params, gradient_boosting_params
]

# classifiers to test
classifiers = [
    RandomForestClassifier(), DecisionTreeClassifier(), Perceptron(),
    SVC(), MLPClassifier(), LogisticRegression(),
    KNeighborsClassifier(), BaggingClassifier(), AdaBoostClassifier(),
    GaussianNB(), GradientBoostingClassifier()
]

names = [
    'RandomForest', 'DecisionTree', 'Perceptron', 'SVM',
    'NeuralNetwork', 'LogisticRegression',
    'KNearestNeighbors', 'Bagging', 'AdaBoost', 'Naive-Bayes', 'GradientBoosting'
]

models = dict(zip(names, zip(classifiers, params)))

"""

# Models and their parameters
models = {
    'RandomForest': (RandomForestClassifier(), random_forest_params),
    'DecisionTree': (DecisionTreeClassifier(), decision_tree_params),
    'Perceptron': (Perceptron(), perceptron_params),
    'SVM': (SVC(), svm_params),
    'NeuralNetwork': (MLPClassifier(), neural_net_params),
    'LogisticRegression': (LogisticRegression(), log_reg_params),
    'KNearestNeighbors': (KNeighborsClassifier(), knn_params),
    'Bagging': (BaggingClassifier(), bagging_params),
    'AdaBoost': (AdaBoostClassifier(), ada_boost_params),
    'Naive-Bayes': (GaussianNB(), {}),
    'GradientBoosting': (GradientBoostingClassifier(), gradient_boosting_params)
}

# Parameter tuning with GridSearchCV
def parameter_tuning(models, X_train, X_test, y_train, y_test):
    print(num_folds, 'fold cross-validation is used\n')
    accuracies = []
    best_parameters = []
    for name, (clf, clf_params) in models.items():
        print(f'Computing GridSearch on {name}...')
        grid_clf = GridSearchCV(estimator=clf, param_grid=clf_params, cv=num_folds)
        grid_clf.fit(X_train, y_train)
        best_parameters.append((name, grid_clf.best_params_))
        predictions = grid_clf.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        cv_scores = cross_val_score(clf, X_train, y_train, cv=num_folds)
        accuracies.append((name, accuracy, np.mean(cv_scores)))
    return accuracies, best_parameters

# Run parameter tuning and display results
results, best_parameters = parameter_tuning(models, X_train, X_test, y_train, y_test)
print('\n============================================================')
for classifier, acc, cv_acc in results:
    print(f'{classifier}: Accuracy with Best Parameters = {round(acc * 100, 4)}% || Mean Cross Validation Accuracy = {round(cv_acc * 100, 4)}%')

In [None]:
#2.  Hyperparameter grids
random_forest_params = dict(
    n_estimators=[5, 10, 15, 20, 25],
    criterion=['gini', 'entropy'],
    max_features=[2, 3, 4, 'auto', 'log2', 'sqrt', None],
    bootstrap=[False, True]
)
decision_tree_params = dict(
    criterion=['gini', 'entropy'],
    splitter=['best', 'random'],
    min_samples_split=[2, 3, 4],
    max_features=[2, 3, 'auto', 'log2', 'sqrt', None],
    class_weight=['balanced', None]
)
perceptron_params = dict(
    penalty=[None, 'l2', 'l1', 'elasticnet'],
    fit_intercept=[False, True],
    shuffle=[False, True],
    class_weight=['balanced', None],
    alpha=[0.0001, 0.00025],
    max_iter=[30, 50, 90]
)
svm_params = dict(
    shrinking=[False, True],
    degree=[3, 4],
    class_weight=['balanced', None]
)
neural_net_params = dict(
    activation=['identity', 'logistic', 'tanh', 'relu'],
    hidden_layer_sizes=[(20, 15, 10), (30, 20, 15, 10), (16, 8, 4)],
    max_iter=[50, 80, 150],
    solver=['adam', 'lbfgs'],
    learning_rate=['constant', 'invscaling', 'adaptive'],
    shuffle=[True, False]
)
log_reg_params = dict(
    class_weight=['balanced', None],
    solver=['newton-cg', 'lbfgs', 'liblinear', 'sag'],
    fit_intercept=[True, False]
)
knn_params = dict(
    n_neighbors=[2, 3, 5, 10],
    weights=['uniform', 'distance'],
    algorithm=['auto', 'ball_tree', 'kd_tree', 'brute'],
    leaf_size=[5, 10, 15, 20]
)
bagging_params = dict(
    n_estimators=[5, 12, 15, 20],
    bootstrap=[False, True]
)
ada_boost_params = dict(
    n_estimators=[50, 75, 100],
    algorithm=['SAMME', 'SAMME.R']
)
gradient_boosting_params = dict(
    n_estimators=[15, 25, 50]
)

# Models and their parameters
models = {
    'RandomForest': (RandomForestClassifier(), random_forest_params),
    'DecisionTree': (DecisionTreeClassifier(), decision_tree_params),
    'Perceptron': (Perceptron(), perceptron_params),
    'SVM': (SVC(), svm_params),
    'NeuralNetwork': (MLPClassifier(), neural_net_params),
    'LogisticRegression': (LogisticRegression(), log_reg_params),
    'KNearestNeighbors': (KNeighborsClassifier(), knn_params),
    'Bagging': (BaggingClassifier(), bagging_params),
    'AdaBoost': (AdaBoostClassifier(), ada_boost_params),
    'Naive-Bayes': (GaussianNB(), {}),
    'GradientBoosting': (GradientBoostingClassifier(), gradient_boosting_params)
}

# Parameter tuning with GridSearchCV
def parameter_tuning(models, X_train, X_test, y_train, y_test):
    print(num_folds, 'fold cross-validation is used\n')
    accuracies = []
    best_parameters = []
    for name, (clf, clf_params) in models.items():
        print(f'Computing GridSearch on {name}...')
        grid_clf = GridSearchCV(estimator=clf, param_grid=clf_params, cv=num_folds)
        grid_clf.fit(X_train, y_train)
        best_parameters.append((name, grid_clf.best_params_))
        predictions = grid_clf.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        cv_scores = cross_val_score(clf, X_train, y_train, cv=num_folds)
        accuracies.append((name, accuracy, np.mean(cv_scores)))
    return accuracies, best_parameters

# Run parameter tuning and display results
results, best_parameters = parameter_tuning(models, X_train, X_test, y_train, y_test)
print('\n============================================================')
for classifier_name, accuracy, cv_mean in results:
    print(f"{classifier_name} Accuracy on Test Set: {accuracy:.2f}, Mean CV Score: {cv_mean:.2f}")

print('\nBest Parameters for each model:')
for name, params in best_parameters:
    print(f"{name}: {params}")