In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.cluster import MiniBatchKMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

# Carregar os dados

In [3]:
def load_dataset(path):
    # return pd.read_csv(path, sep=",")
    return np.genfromtxt(path, delimiter=",")

In [4]:
focos_dataset = load_dataset("./Focos.csv")

# Divisão dos dados em Treino/Teste/Validação

In [5]:
def split_train_test_validation(dataset, proportionTrain, proportionValidation, proportionTest):
    np.random.shuffle(dataset)
    totalWeight = proportionTrain + proportionValidation + proportionTest
    dataset_size = len(dataset)

    firstSeparator = int(proportionTrain * dataset_size / totalWeight)
    secondSeparator = int((proportionTrain + proportionValidation) * dataset_size / totalWeight)

    return {"train": dataset[:firstSeparator],
        "validation": dataset[firstSeparator:secondSeparator],
        "test": dataset[secondSeparator:]}

In [6]:
focus_splitted_dataset = split_train_test_validation(focos_dataset, 0.8, 0, 0.2)

# Grid Search (usando normalização min-max)

In [7]:
def split_input_and_output(dataset, number_of_outputs=1, ends_with_output=True):
    if (ends_with_output):
        return dataset[:,:-number_of_outputs], dataset[:,-number_of_outputs]
    else:
        return dataset[:,number_of_outputs-1], dataset[:,number_of_outputs:]

def select_model(dataset, model_creator, hyperparameter_candidates):
    chosen_model = GridSearchCV(model_creator(), hyperparameter_candidates, cv=5, scoring='accuracy')
    X, y = split_input_and_output(dataset)
    chosen_model.fit(X, y)
    return chosen_model

## Mini Batch K-Means

In [8]:
def minibatchkmeans_hyperparameter_candidates():
    return {
        "n_clusters": [2, 4, 8, 16, 32, 64, 128, 256, 512],
        "batch_size": [100, 200, 400, 800, 1600, 3200, 6400, 12800],
        "max_iter": [10, 20, 40, 80, 160, 320, 640, 1280],
        "tol": [0.1, 0.01, 0.001, 0.0001, 0.00001]
    }

def minibatchkmeans_model_creator():
    return Pipeline([('scaler', MinMaxScaler()), ('model', MiniBatchKMeans())])

In [9]:
minibatchkmeans_model = select_model(focus_splitted_dataset["train"], minibatchkmeans_model_creator, minibatchkmeans_hyperparameter_candidates())

ValueError: Invalid parameter 'batch_size' for estimator Pipeline(steps=[('scaler', MinMaxScaler()), ('model', MiniBatchKMeans())]). Valid parameters are: ['memory', 'steps', 'verbose'].

## Multilayer Perceptron Classifier

In [None]:
def mlp_hyperparameter_candidates():
    return {
        "hidden_layer_sizes": [(2,), (4,), (8,), (16,), (32,), (64,), (128,), (256,), (512,)],
        "activation": ["identity", "logistic", "tanh", "relu"],
        "alpha": [0.0001, 0.001, 0.01, 0.1, 1],
        "batch_size": ["auto", 100, 200, 400, 800, 1600, 3200, 6400, 12800],
    }

def mlp_model_creator():
    return Pipeline([('scaler', MinMaxScaler()), ('model', MLPClassifier())])

In [None]:
mlp_model = select_model(focus_splitted_dataset["train"], mlp_model_creator, mlp_hyperparameter_candidates())

## Decision Tree

In [None]:
def decisiontree_hyperparameter_candidates():
    return {
        "criterion": ["gini", "entropy"],
        "splitter": ["best", "random"],
        "max_depth": [None, 2, 4, 8, 16, 32, 64, 128, 256, 512],
        "min_samples_split": [2, 4, 8, 16, 32, 64, 128, 256, 512],
        "min_impurity_decrease": [0, 0.1, 0.2, 0.3, 0.4, 0.5],
    }

def decisiontree_model_creator():
    return Pipeline([('scaler', MinMaxScaler()), ('model', DecisionTreeClassifier())])

In [None]:
decisiontree_model = select_model(focus_splitted_dataset["train"], decisiontree_model_creator, decisiontree_hyperparameter_candidates())

# Reporte dos resultados

## Mini Batch K-Means

## Multilayer Perceptron

## Decision Tree