Before starting, we need to install few dependencies

In [0]:
!pip install --quiet --upgrade tensorflow dialogflow scipy tensorflow-hub

In [0]:
import dialogflow_v2 as dialogflow
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd
from collections import defaultdict
import os
from google.colab import auth
import pickle
print(tf.__version__)

In [0]:
PROJECT_ID="" # Set your GCP Project Id
SERVICE_ACCOUNT_EMAIL="" # Set your Dialogflow service account email

In [0]:
auth.authenticate_user()
!gcloud config set project {PROJECT_ID}
!gcloud iam service-accounts keys create sa-key.json \
 --iam-account={SERVICE_ACCOUNT_EMAIL} --project={PROJECT_ID}

In [0]:
def fetch_intents_training_phrases(service_account_file, project):

  dialogflow_entity_client = dialogflow.EntityTypesClient.from_service_account_file(service_account_file)
  parent = dialogflow_entity_client.project_agent_path(project)
  entities = list(dialogflow_entity_client.list_entity_types(parent))

  dialogflow_intents_client = dialogflow.IntentsClient.from_service_account_file(service_account_file)
  parent = dialogflow_intents_client.project_agent_path(project)
  intents = list(dialogflow_intents_client.list_intents(
    parent=parent,
    intent_view=dialogflow.enums.IntentView.INTENT_VIEW_FULL))

  entities_name_to_value = {
    'date-time': 'tomorrow afternoon',
    'date': 'tomorrow',
    'date-period': 'April',
    'time': '4:30 pm',
    'time-period': 'afternoon',
    'number': 'one',
    'cardinal': 'ten',
    'ordinal': 'tenth',
    'number-integer': '1',
    'number-sequence': '1 2 3',
    'flight-number' : 'LH4234',
    'unit-area': 'ten square feet',
    'unit-currency': '5 dollars',
    'unit-length': 'ten meters',
    'unit-speed': '5 km/h',
    'unit-volume': '2 liters',
    'unit-weight': '5 kilos',
    'unit-information': '5 megabytes',
    'percentage': '10 percent',
    'temperature': '25 degrees',
    'duration': '5 days',
    'age': '1 year old',
    'currency-name': 'euros',
    'unit-area-name': 'suqare meters',
    'unit-length-name': 'meters',
    'unit-speed-name': 'kilometer per hour',
    'unit-volume-name': 'cubic meters',
    'unit-weight-name': 'kilograms',
    'unit-information-name': 'megabytes',
    'address': '1600 Amphitheatre Pkwy, Mountain View, CA 94043',
    'zip-code': '94122',
    'geo-capital': 'Rome',
    'geo-country': 'Denmark',
    'geo-country-code': 'US',
    'geo-city': 'Tokyo',
    'geo-state': 'Scotland',
    'place-attraction': 'Golden Gate Bridge',
    'airport': 'SFO',
    'location': '1600 Amphitheatre Pkwy, Mountain View, CA 94043',
    'email': 'test@example.com',
    'phone-number': '+11234567890',
    'given-name': 'Joe',
    'last-name': 'Smith',
    'music-artist': 'Beatles',
    'music-genre': 'Jazz',
    'color': 'Blue',
    'language': 'Japanese',
    'any': 'flower',
    'url': 'google.com'
  }
  for intent in intents:
      entities_used = {entity.display_name 
        for entity in intent.parameters}

      for entity in entities:
          if entity.display_name in entities_used \
                  and entity.display_name not in entities_name_to_value:
                  
              entities_name_to_value[entity.display_name] = np.random.choice(
                  np.random.choice(entity.entities).synonyms, replace=False)

  intent_training_phrases = defaultdict(list)
  for intent in intents:
      for training_phrase in intent.training_phrases:

          parts = [
              entities_name_to_value[part.alias] 
              if part.alias in  entities_name_to_value else part.text
              for part in training_phrase.parts
          ]
          intent_training_phrases[intent.display_name].append(
              "".join(parts))
      # Remove intents with no training phrases
      if not intent_training_phrases[intent.display_name]:
          del intent_training_phrases[intent.display_name]
  return intent_training_phrases


In [0]:
intent_training_phrases = fetch_intents_training_phrases("sa-key.json", PROJECT_ID)

for intent in intent_training_phrases:
  print("{}:{}".format(intent, intent_training_phrases[intent]))

### Let's create some embeddings



In [0]:
embed_module = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")

In [0]:
def make_embeddings_fn():
  placeholder = tf.placeholder(dtype=tf.string, shape=[None])
  embed = embed_module(placeholder)
  session = tf.Session()
  session.run([tf.global_variables_initializer(), tf.tables_initializer()])
  def _embeddings_fn(sentences):
      computed_embeddings = session.run(
        embed, feed_dict={placeholder: sentences})
      return computed_embeddings
  return _embeddings_fn

generate_embeddings = make_embeddings_fn()

In [0]:
sentences = [
  "Hi",
  "Hello",
  "Goodbye",
  "I'm a software program"
]
computed_embeddings = generate_embeddings(sentences)

In [0]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import math

point_size=200

pca = PCA(n_components=2)
points_2d = pca.fit_transform(computed_embeddings)

fig = plt.figure(figsize=(16,10))
ax = fig.add_subplot(111)

for point, marker in zip(points_2d, ['o', '^', '*', 's']):
  xs = point[0]
  ys = point[1]
  ax.scatter(xs, ys, marker=marker, s=point_size)

ax.set_xlabel('X Dimension')
ax.set_ylabel('Y Dimension')

ax.legend(sentences)
plt.show()

In [0]:
training_phrases_with_embeddings = defaultdict(list)
for intent_name, training_phrases_list in intent_training_phrases.items():
  computed_embeddings = generate_embeddings(training_phrases_list)
  training_phrases_with_embeddings[intent_name] = dict(zip(training_phrases_list, computed_embeddings))

In [0]:
for intent_name, _ in training_phrases_with_embeddings.items():
  training_phrase, embeddings = next(iter(training_phrases_with_embeddings[intent_name].items()))
  print("{}: {{'{}':{}}}".format(intent_name, training_phrase, embeddings[:5]))

In [0]:
from sklearn.decomposition import PCA

embedding_vectors = []

for intent, training_phrases_and_embeddings in training_phrases_with_embeddings.items():
  for training_phrase, embeddings in training_phrases_and_embeddings.items():
    embedding_vectors.append(embeddings)

embedding_vectors = np.asarray(embedding_vectors)

pca = PCA(n_components=2)
pca.fit(embedding_vectors)

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(111)

legend = []

for color, intent in enumerate(training_phrases_with_embeddings.keys()):
  phrases = list(training_phrases_with_embeddings[intent].keys())
  embeddings = list(training_phrases_with_embeddings[intent].values())
  points = pca.transform(embeddings)
  xs = points[:,0]
  ys = points[:,1]
  ax.scatter(xs, ys, marker='o', s=100, c="C"+str(color))
  for i, phrase in enumerate(phrases):
    ax.annotate(phrase[:15] + '...', (xs[i], ys[i]))
  legend.append(intent)


ax.legend(legend)
plt.show()

In [0]:
from sklearn.metrics.pairwise import cosine_similarity      

flatten = []

for intent in training_phrases_with_embeddings:
  for phrase in training_phrases_with_embeddings[intent]:
    flatten.append((intent, phrase,  training_phrases_with_embeddings[intent][phrase]))

data = []
for i in range(len(flatten)):
  for j in range(i+1, len(flatten)):

    intent_1 = flatten[i][0]
    phrase_1 = flatten[i][1]
    embedd_1 = flatten[i][2]

    intent_2 = flatten[j][0]
    phrase_2 = flatten[j][1]
    embedd_2 = flatten[j][2]

    similarity = cosine_similarity([embedd_1], [embedd_2])[0][0]

    record = [intent_1, phrase_1, intent_2, phrase_2, similarity]
    data.append(record)

similarity_df = pd.DataFrame(data, 
  columns=["Intent A", "Phrase A", "Intent B", "Phrase B", "Similarity"])

In [0]:
different_intent = similarity_df['Intent A'] != similarity_df['Intent B']
display(similarity_df[different_intent].sort_values('Similarity', ascending=False).head(5))

### Compute Intents Cohesion

In [0]:
same_intent = similarity_df['Intent A'] == similarity_df['Intent B']
cohesion_df = pd.DataFrame(similarity_df[different_intent].groupby('Intent A', as_index=False)['Similarity'].mean())
cohesion_df.columns = ['Intent', 'Cohesion']
display(cohesion_df)

In [0]:
different_intent = similarity_df['Intent A'] != similarity_df['Intent B']
separation_df = pd.DataFrame(similarity_df[different_intent].groupby(['Intent A', 'Intent B'], as_index=False)['Similarity'].mean())
separation_df['Separation'] = 1 - separation_df['Similarity']
del separation_df['Similarity']
display(separation_df.sort_values('Separation'))