In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white")

In [None]:
%matplotlib inline

In [None]:
X = pd.read_pickle("./X-2s.pkl")
y = pd.read_pickle("./y-2s.pkl")
espl = pd.read_pickle("./espl.pkl")

### Analisi esplorativa con `PCA`, `ICA`, `t-SNE`.

In [None]:
from sklearn.decomposition import PCA, FastICA
from sklearn.manifold import TSNE

In [None]:
from funzioni import Whiten

In [None]:
# Sbiancamento dei dati
esplWh = Whiten().fit_transform(espl)

In [None]:
pca = PCA(n_components=2, random_state=42)
esplPCA = pca.fit_transform(esplWh)

In [None]:
ica = FastICA(n_components=2, random_state=42)
esplICA = ica.fit_transform(esplWh)

In [None]:
tsne = TSNE(n_components=2, random_state=42)
esplTSNE = tsne.fit_transform(esplWh)

In [None]:
# Funzione che produce lo scatterplot con i colori per i gruppi dati da y
from funzioni import ScatterGroup, MatriceConfusione

In [None]:
for title,dat in zip(["PCA","ICA","t-SNE"], [esplPCA, esplICA, esplTSNE]):
    fig, ax = ScatterGroup(pd.DataFrame(dat, columns=["Prima componente", "Seconda componente"]),
                       grp=y, palette="bright")
    fig.set_figwidth(11)
    fig.set_figheight(6)
    ax.set_title(title)
    plt.legend(bbox_to_anchor=(1,0.7))
    plt.savefig("./figure/"+title+".png", bbox_inches="tight", dpi=180)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
X_train, X_val, y_train, y_val = train_test_split(espl, y, test_size=0.25, random_state=42)

In [None]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

In [None]:
y_pred_lda = lda.predict(X_val)
print("Accuratezza LDA: {:.1f}%".format(100*accuracy_score(y_val, y_pred_lda)))
print(confusion_matrix(y_val, y_pred_lda))
MatriceConfusione(y_val, y_pred_lda)

In [None]:
temp = y_val.unique()

In [None]:
temp.sort()

In [None]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)

In [None]:
y_pred_qda = qda.predict(X_val)
print("Accuratezza QDA: {:.1f}%".format(100*accuracy_score(y_val, y_pred_qda)))
print(confusion_matrix(y_val, y_pred_qda))
MatriceConfusione(y_val, y_pred_qda)