# Gaussian Mixture

In [1]:
import pandas as pd

dataset = pd.read_csv("../dataset/camera.csv")
print(dataset.shape)

X = dataset.iloc[:, 0:-1]
y = dataset.iloc[:, -1]

(141555, 116)


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
from sklearn.mixture import GaussianMixture

gm = GaussianMixture(
    n_components = 3,
    covariance_type = 'spherical',
    tol = 0.001,
    reg_covar = 1e-06,
    max_iter = 100,
    n_init = 1,
    init_params = 'kmeans',
    weights_init = None,
    means_init = None,
    precisions_init = None,
    random_state = None,
    warm_start = False,
    verbose = 0, 
    verbose_interval = 10
).fit(X_train, y_train)

In [4]:
y_pred = gm.predict(X_test)

In [5]:
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("Matriz de confusão\n{}".format(cm))

Matriz de confusão
[[17362  1004     0]
 [ 2335  2309   326]
 [ 2993  1982     0]]


In [6]:
import numpy as np

x = np.array([['Gaussian Mixture'], cm])
np.save('resultados/gm', x)

---

## Seleção de features

### Limiar de variância

Eliminação de features cujo valor é 0 ou 1 em mais de 80% das amostras:

In [7]:
from sklearn.feature_selection import VarianceThreshold

X = VarianceThreshold(threshold=(.8 * (1 - .8))).fit_transform(X)
print(X.shape)

(141555, 105)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gm = GaussianMixture(
    n_components = 3,
    covariance_type = 'spherical',
    tol = 0.001,
    reg_covar = 1e-06,
    max_iter = 100,
    n_init = 1,
    init_params = 'kmeans',
    weights_init = None,
    means_init = None,
    precisions_init = None,
    random_state = None,
    warm_start = False,
    verbose = 0, 
    verbose_interval = 10
).fit(X_train, y_train)

y_pred = gm.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print("Matriz de confusão\n{}".format(cm))

x = np.array([['Gaussian Mixture (var)'], cm])
np.save('resultados/gm2', x)

Matriz de confusão
[[17362  1004     0]
 [ 2335  2309   326]
 [ 2993  1982     0]]


---

### Limiar de correlação

Elimina features que tenham mais de 90% de correlação com outra.

In [9]:
X = pd.read_csv('../dataset/camera_featred.csv')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gm = GaussianMixture(
    n_components = 3,
    covariance_type = 'spherical',
    tol = 0.001,
    reg_covar = 1e-06,
    max_iter = 100,
    n_init = 1,
    init_params = 'kmeans',
    weights_init = None,
    means_init = None,
    precisions_init = None,
    random_state = None,
    warm_start = False,
    verbose = 0, 
    verbose_interval = 10
).fit(X_train, y_train)

y_pred = gm.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print("Matriz de confusão\n{}".format(cm))

x = np.array([['Gaussian Mixture (cor)'], cm])
np.save('resultados/gm3', x)

Matriz de confusão
[[17334  1032     0]
 [ 2335  2331   304]
 [ 2993  1982     0]]
