# Projeto Final - Fundamentos de Machine Learning
# Modelos de Agrupamento

# 0.0. Imports

In [79]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, AffinityPropagation
from sklearn import metrics as mt
from sklearn.model_selection import ParameterGrid
import warnings
warnings.filterwarnings('ignore')

## 0.1. Funções

In [80]:
# Função para encontrarmos os melhores valores dos hyperparametros
def search_best_params(x, algorithm, parameter_grid):
    best_score = 0
    best_params = {}
    for params in parameter_grid:
        model = algorithm(**params)
        model.fit(x)
        labels = model.predict(x)
        current_metric = mt.silhouette_score(x, labels)
        if current_metric > best_score:
            best_score = current_metric
            best_params = params
            best_labels = labels
    
    return best_score, best_params

# 1.0. Loading Data

In [4]:
x = pd.read_csv('data/clusterizacao/X_dataset.csv')

In [5]:
x.head()

Unnamed: 0,alcohol,malic_acid,ash,ash_alcanity,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280,proline
0,1.518613,0.1917,0.232053,-1.169593,1.913905,0.627586,0.57384,-0.659563,1.224884,0.251717,0.455285,0.970696,0.561341
1,0.24629,0.205534,-0.827996,-2.490847,0.018145,0.575862,0.510549,-0.820719,-0.544721,-0.293321,0.463415,0.78022,0.550642
2,0.196879,0.320158,1.109334,-0.268738,0.088358,0.627586,0.611814,-0.498407,2.135968,0.26902,0.447154,0.695971,0.646933
3,1.69155,0.23913,0.487926,-0.809251,0.930918,0.989655,0.664557,-0.981875,1.032155,1.186068,0.308943,0.798535,0.857347
4,0.2957,0.365613,1.840403,0.451946,1.281985,0.627586,0.495781,0.226796,0.401404,-0.319276,0.455285,0.608059,0.325963


# 2.0. Ensaios

## 2.1. K-means

In [46]:
kmeans_params = ParameterGrid({
    'n_clusters' : np.arange(2,10,1),
    'random_state' : [0]
})
kmeans_scores = search_best_params(x, KMeans, kmeans_params)
kmeans_scores

(0.23157240499717635, {'n_clusters': 3, 'random_state': 0})

## 2.2. Affinity Propagation

In [70]:
afprop_params = ParameterGrid({
    'preference' : np.arange(-100,0,1),
    'random_state' : [0]
})
afprop_scores = search_best_params(x, AffinityPropagation, afprop_params)
afprop_scores

(0.20365750456597356, {'preference': -47, 'random_state': 0})

In [72]:
afprop_scores[0]

0.20365750456597356

# 3.0. Comparação dos modelos

In [78]:
pd.DataFrame([kmeans_scores[0], afprop_scores[0]], columns=['Silhouette Score'], index = ['K-Means', 'Affinity Propagation'])

Unnamed: 0,Silhouette Score
K-Means,0.231572
Affinity Propagation,0.203658
