In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB
from sklearn.neural_network import MLPClassifier

from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

import os

from algorithm_wrapper import ModelWrapper

In [None]:
# Defina os diretórios
source_dir = 'selected_images_dog_cat'

uploaded_images = []
# Percorra os arquivos no diretório de origem
for filename in os.listdir(source_dir):
    source_file = os.path.join(source_dir, filename)
    source_file = source_file.replace('\\', '/')
    uploaded_images.append(source_file)

print(uploaded_images)

In [None]:
# Define a função para determinar a classe com base no nome do arquivo
def determine_class(filename):
    if 'boxer' in filename or 'wheaten_terrier' in filename:
        return 'dog'
    elif 'Siamese' in filename or 'Abyssinian' in filename:
        return 'cat'
    else:
        return 'unknown'

# Cria uma nova coluna 'Class' no dataset
classes = [determine_class(filename) for filename in uploaded_images]

# Exibe as classes
print(classes.count('dog'))
print(classes.count('cat'))
print(len(classes))
print(classes)

In [None]:
import os
import json

file_metrics_names = ['bagging_models', 'bagging_models_05', 'boosting_models', 'random_forest_models']

for model in file_metrics_names:
    # Define the filename
    filename = f'metrics_{model}.json'

    # Check if the file exists
    if not os.path.exists(filename):

        # Create and write the initial content to the file
        open(filename, 'w').close()

        print(f"File '{filename}' created with initial content.")
    else:
        print(f"File '{filename}' already exists.")


In [5]:
filename = 'features.json'

features_dict = {}

try:
    with open('features.json', 'r') as f:
        features_dict = json.load(f)
except Exception:
    print("Maybe you didn't run 'extrair_features.ipynb' first, it should be run before this one.")

In [14]:
import re

estimator_names = {
    'DecisionTreeClassifier': 'AD',
    'KNeighborsClassifier': 'k-NN',
    'GaussianNB': 'NB',
    'MLPClassifier': 'MLP'
}

def run_experiments(models, model_name):
    metrics_dict = {}

    # Iterate through the dictionary to get both the name and the variable
    for feature_name, feature_data in features_dict.items():
        for model in models:
            criterion_name = model.get_params().get('criterion', 'default')
            print(f"Criterion name: {criterion_name}")
            estimator_name = re.match(r'(\w+)', str(model.estimator)).group(1) if hasattr(model, 'estimator') else 'None'
            estimator_name = estimator_names[estimator_name] if estimator_name in estimator_names else estimator_name
            print(f"Estimator name: {estimator_name}")
            n_estimators = model.get_params().get('n_estimators', 'default')
            print(f"Number of estimators: {n_estimators}")
            model_wrapper = ModelWrapper(model, feature_data, classes, grid_search=True)
            print(f"Training model with feature set: {feature_name}")
            name = ''
            if criterion_name == 'default':
                name = estimator_name
            else:
                name = criterion_name
            model_wrapper.model_train('kfold')
            metrics_dict.update({f"{feature_name}_kfold_{name}_{n_estimators}": model_wrapper.metrics})
            model_wrapper.model_train('holdout')
            metrics_dict.update({f"{feature_name}_holdout_{name}_{n_estimators}": model_wrapper.metrics})
        

    with open(f'./metrics_{model_name}.json', 'w') as f:
        json.dump(metrics_dict, f, indent=4)

In [None]:
bagging_models = [
    *[BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=5), n_estimators=i) for i in [10,20,30]],
    *[BaggingClassifier(estimator=KNeighborsClassifier(n_neighbors=15), n_estimators=i) for i in [10,20,30]],
    *[BaggingClassifier(estimator=GaussianNB(), n_estimators=i) for i in [10,20,30]],
    *[BaggingClassifier(estimator=MLPClassifier(activation="relu",
                                                hidden_layer_sizes=(100,50),
                                                learning_rate_init=0.0001,
                                                max_iter=500,
                                                solver="sgd"), n_estimators=i) for i in [10,20,30]],
]

run_experiments(bagging_models, 'bagging_models')

In [None]:
bagging_models_05 = [
    *[BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=5), n_estimators=i, max_features=0.5) for i in [10,20,30]],
    *[BaggingClassifier(estimator=KNeighborsClassifier(n_neighbors=15), n_estimators=i, max_features=0.5) for i in [10,20,30]],
    *[BaggingClassifier(estimator=GaussianNB(), n_estimators=i, max_features=0.5) for i in [10,20,30]],
    *[BaggingClassifier(estimator=MLPClassifier(activation="relu",
                                                hidden_layer_sizes=(100,50),
                                                learning_rate_init=0.0001,
                                                max_iter=500,
                                                solver="sgd"), n_estimators=i, max_features=0.5) for i in [10,20,30]],
]

run_experiments(bagging_models_05, 'bagging_models_05')

In [None]:
boosting_models = [
    *[AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=5), n_estimators=i) for i in [10,20,30]],
    *[AdaBoostClassifier(estimator=GaussianNB(), n_estimators=i) for i in [10,20,30]]
]

run_experiments(boosting_models, 'boosting_models')

In [None]:
random_forest_models = [
    *[RandomForestClassifier(criterion='gini', n_estimators=i) for i in [10,20,30,100]],
    *[RandomForestClassifier(criterion='entropy', n_estimators=i) for i in [10,20,30,100]],
    *[RandomForestClassifier(criterion='log_loss', n_estimators=i) for i in [10,20,30,100]],
]

run_experiments(random_forest_models, 'random_forest_models')