# Gaussian Mixture

In [1]:
import pandas as pd

In [2]:
dataset = pd.read_csv("../dataset/camera.csv")
print(dataset.shape)

(141555, 116)


In [21]:
X = dataset.iloc[:, 0:-1]
y = dataset.iloc[:, -1]

## Seleção de features

Eliminação de features por limiar de variância, descartando colunas cujo valor é 0 ou 1 em mais de 80% das amostras:

In [22]:
from sklearn.feature_selection import VarianceThreshold

X = VarianceThreshold(threshold=(.8 * (1 - .8))).fit_transform(X)
print(X.shape)

(141555, 105)


In [23]:
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
gm = GaussianMixture(
    n_components = 3,
    covariance_type = 'spherical',
    tol = 0.001,
    reg_covar = 1e-06,
    max_iter = 100,
    n_init = 1,
    init_params = 'kmeans',
    weights_init = None,
    means_init = None,
    precisions_init = None,
    random_state = None,
    warm_start = False,
    verbose = 0, 
    verbose_interval = 10
).fit(X_train, y_train)

In [25]:
y_pred = gm.predict(X_test)

In [27]:
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("Matriz de confusão\n{}".format(cm))

Matriz de confusão
[[17362     0  1004]
 [ 2335   326  2309]
 [ 2993     0  1982]]


In [35]:
import numpy as np

x = np.array([['Gaussian Mixture'], cm])
np.save('resultados/gm', x)