In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import glob # lista di files in formato *.estensione
import matplotlib.pyplot as plt
import os
import seaborn as sns

In [None]:
import tqdm
from scipy.fftpack import fft

In [None]:
X = pd.read_pickle("X-2s.pkl")
X = X.drop("user", axis=1)
y = pd.read_pickle("y-2s.pkl")
espl = pd.read_pickle("espl.pkl")

In [None]:
T = 10*0.001  # sampling interval 
N = 200
fftX = pd.DataFrame(fft(X))
#create new x-axis: frequency from signal
xf = np.linspace(0.0, 1.0/T, N//2)
#plot results
for i in range(fftX.shape[0]):
    plt.plot(xf, np.abs(fftX.loc[i,0:N//2-1]) * 1/N, color="blue", alpha=0.007, solid_capstyle="butt")

plt.xlabel("Frequency [Hz]")
plt.ylabel("Amplitude")
plt.grid()

In [None]:
fftX = fftX.abs()
#fftX["intTrapz"] = espl["intTrapz"]
#fftX["maxA"] = espl["maxA"]
#fftX["MVDeriv"] = espl["MVDeriv"]
#fftX["meanA"] = espl["meanA"]
fftX.head()

In [None]:
from funzioni import indice_gini, tasso_errata_classificazione, MatriceConfusione
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import graphviz

In [None]:
X_train, X_val, y_train, y_val = train_test_split(fftX, y, test_size=0.25, random_state=42)

In [None]:
dtc = Pipeline([ 
    ("tree", DecisionTreeClassifier(random_state=42))
])

dtc.fit(X_train, y_train)

y_pred = dtc.predict(X_val)
dtc_acc = accuracy_score(y_val, y_pred)

#print('Accuratezza DummyClassifier("most_frequent"): {:.1f}%'.format(100 * dc_acc))
print("Accuratezza DecisionTreeClassifier(): {:.2f}%".format(100 * dtc_acc))
MatriceConfusione(y_val, y_pred)
plt.show()

In [None]:
from sklearn.model_selection import ParameterGrid
import tqdm

In [None]:
param_grid = ParameterGrid({
    'tree__max_depth': np.arange(1, dtc.named_steps["tree"].tree_.max_depth),
    'tree__min_samples_leaf': 2 ** np.arange(9),
})
print(param_grid.param_grid)

In [None]:
risultati = []

for params in tqdm.tqdm(param_grid):
    dtc.set_params(**params)
    dtc.fit(X_train, y_train)
    y_pred = dtc.predict(X_val)
    params["accuracy_score"] = accuracy_score(y_val, y_pred)
    risultati.append(params)

risultati = pd.DataFrame(risultati).sort_values(["accuracy_score", "tree__max_depth"], ascending=[False, True])
risultati.reset_index(drop=True, inplace=True)
print("Primi 5:")
display(risultati.head())

print("Ultimi 5:")
risultati.tail()

In [None]:
from funzioni import grafico_metrica_iperparametro

In [None]:
max_depth = risultati.loc[0, "tree__max_depth"]
min_samples_leaf = risultati.loc[0, "tree__min_samples_leaf"]

dtc_tun = Pipeline([
    ("tree", DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=42))
])

dtc_tun.fit(X_train, y_train)

y_pred = dtc_tun.predict(X_val)
dtc_tun_acc = accuracy_score(y_val, y_pred)

#print('Accuratezza DummyClassifier("most_frequent"): {:.1f}%'.format(100 * dc_acc))
print("Accuratezza DecisionTreeClassifier(): {:.1f}%".format(100 * dtc_acc))
print("Accuratezza DecisionTreeClassifier(max_depth={}, min_samples_leaf={}): {:.1f}%".format(
    max_depth, min_samples_leaf, 100 * dtc_tun_acc))
print(confusion_matrix(y_val, y_pred))
MatriceConfusione(y_val, y_pred)
plt.show()

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
tsne = TSNE(n_components=2, random_state=42)
fftTSNE = tsne.fit_transform(fftX)

pca = PCA(n_components=2, random_state=42)
fftPCA = pca.fit_transform(fftX)

In [None]:
from funzioni import ScatterGroup, MatriceConfusione

for title,dat in zip(["PCA","t-SNE"], [fftPCA, fftTSNE]):
    fig, ax = ScatterGroup(pd.DataFrame(dat, columns=["Prima componente", "Seconda componente"]),
                       grp=y, palette="bright")
    fig.set_figwidth(11)
    fig.set_figheight(6)
    ax.set_title(title)
    plt.legend(bbox_to_anchor=(1,0.7))

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

In [None]:
y_pred_lda = lda.predict(X_val)
print("Accuratezza LDA su FT: {:.1f}%".format(100*accuracy_score(y_val, y_pred_lda)))
print(confusion_matrix(y_val, y_pred_lda))
MatriceConfusione(y_val, y_pred_lda)
plt.show()

In [None]:
qda = QuadraticDiscriminantAnalysis()

qda.fit(X_train, y_train)

In [None]:
y_pred_qda = qda.predict(X_val)
print("Accuratezza QDA su FT: {:.1f}%".format(100*accuracy_score(y_val, y_pred_qda)))
print(confusion_matrix(y_val, y_pred_qda))
MatriceConfusione(y_val, y_pred_qda)
plt.show()