## Imports

In [None]:
import os
import sys

import pandas as pd

sys.path.append(os.path.join("..", "src"))
from candidate_extraction.candidate_extraction import Extractor
from clustering.clustering import ClusteringModel
from clustering.clustering_evaluation import ClusteringEvaluation
from intent_label_generation.intent_label_generation import IntentLabelGenerator
from semantic_representation.semantic_representation import SemanticRepresentation
from supported_config import params

## Config

In [None]:
# Data
dataset = 'snips'

#Semantic Representation
embedding_model_name = 'use'

# Clustering
clustering_algorithm = 'kmeans'
clustering_measure = 'balanced'

# Candidate Extraction
extraction_method = 'liu'

# Intent Label Generation
generation_method = 'action_object_top_1'

config = {'dataset': dataset,
          'embedding_model_name': embedding_model_name,
          'clustering_algorithm': clustering_algorithm,
          'clustering_measure': clustering_measure,
          'extraction_method': extraction_method,
          'generation_method': generation_method}

In [None]:
gpu_fit = False
gpu_eval = False

## Load Data

In [None]:
data_path = os.path.join("..", "data", "raw", f"{dataset}.csv")
df = pd.read_csv(data_path)
df.head()

## Semantic Representation

In [None]:
utterances = df["text"].tolist()
embedding_model = SemanticRepresentation(config)
embeddings = embedding_model.embed(utterances)

## Clustering

In [None]:
cluster_model = ClusteringModel(config, gpu_fit=gpu_fit)
cluster_model.run_clustering(embeddings, params[config["clustering_algorithm"]])

### Clustering Evaluation

In [None]:
cluster_evaluation = ClusteringEvaluation(config, gpu_eval=gpu_eval)
best_clusterings = cluster_evaluation.get_best_params(embeddings)

## Candidate Extraction

In [None]:
extractor = Extractor(config)
df = extractor.extract(df)

## Intent Label Generation

In [None]:
ilg = IntentLabelGenerator(config)
df = ilg.generate(df, best_clusterings.labels_)