# Regroupement des intentions
Ce notebook illustre l'utilisation du script `intention_clustering.py` pour regrouper et visualiser les intentions d'un jeu de données.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import pandas as pd

from train.intention_embedding import IntentionEncoder
from collections import Counter

DATA_PATH = 'data/expert_dataset.npz'
data = np.load(DATA_PATH, allow_pickle=True)
intentions = [str(i) for i in data['intentions']]
encoder = IntentionEncoder()
encoder.fit(intentions)
vecs = encoder.encode_batch(intentions).cpu().numpy()


In [None]:
# Exemple de clustering avec KMeans
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4, random_state=0).fit(vecs)
labels = kmeans.labels_

pca = PCA(n_components=2).fit_transform(vecs)
plt.figure(figsize=(6,4))
plt.scatter(pca[:,0], pca[:,1], c=labels)
plt.title('Projection PCA des intentions')
plt.show()


In [None]:
# Heatmap des fréquences par cluster
df = pd.DataFrame({'intent': intentions, 'cluster': labels})
freq = pd.crosstab(df['cluster'], df['intent'])
plt.figure(figsize=(10,4))
plt.imshow(freq, aspect='auto', cmap='Blues')
plt.yticks(range(len(freq.index)), freq.index)
plt.xticks(range(len(freq.columns)), freq.columns, rotation=90)
plt.title('Fréquence des intentions par cluster')
plt.show()
