# Process Synonyms

This notebook uses a combination of Python data science libraries and the Google Natural Language API (machine learning) to expand the vocabulary of the chatbot by generating synonyms for topics created in the previous notebook.

In [1]:
!pip uninstall -y google-cloud-datastore

Uninstalling google-cloud-datastore-1.15.3:
  Successfully uninstalled google-cloud-datastore-1.15.3


In [2]:
!pip install google-cloud-datastore

Collecting google-cloud-datastore
  Using cached https://files.pythonhosted.org/packages/b8/6c/bedcab39e8dc969f7a48d058dbacd69fc07ce3f817a03de875902016f667/google_cloud_datastore-1.15.3-py2.py3-none-any.whl
Installing collected packages: google-cloud-datastore
Successfully installed google-cloud-datastore-1.15.3


In [3]:
!pip install inflect

Collecting inflect
  Downloading https://files.pythonhosted.org/packages/2a/14/49a8afaaa66fb49cda8e60512f0fc07594232fb10ea6aa8995c069172cf6/inflect-3.0.2-py2.py3-none-any.whl
Collecting importlib-metadata (from inflect)
  Downloading https://files.pythonhosted.org/packages/98/b8/8ec57a8ef46fbe7f185318c7ff7df9a06c9df451d9a59a067bfa851bb828/importlib_metadata-2.1.1-py2.py3-none-any.whl
Collecting zipp>=0.5 (from importlib-metadata->inflect)
  Downloading https://files.pythonhosted.org/packages/96/0a/67556e9b7782df7118c1f49bdc494da5e5e429c93aa77965f33e81287c8c/zipp-1.2.0-py2.py3-none-any.whl
Collecting contextlib2; python_version < "3" (from importlib-metadata->inflect)
  Downloading https://files.pythonhosted.org/packages/85/60/370352f7ef6aa96c52fb001831622f50f923c1d575427d021b8ab3311236/contextlib2-0.6.0.post1-py2.py3-none-any.whl
Installing collected packages: contextlib2, zipp, importlib-metadata, inflect
Successfully installed contextlib2-0.6.0.post1 importlib-metadata-2.1.1 inflect-

Hit Reset Session > Restart, then resume with the following cells. 

In [1]:
# Only need to do this once...
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /content/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /content/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [2]:
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))

In [3]:
from google.cloud import datastore

In [4]:
datastore_client = datastore.Client()

In [5]:
client = datastore.Client()
query = client.query(kind='Topic')
results = list(query.fetch())

In [6]:
import inflect
plurals = inflect.engine()

## Extract Synonyms with Python
Split the topic into words and use PyDictionary to look up synonyms in a "thesaurus" for each word.  Store these in Datastore and link them back to the topic.  Note this section uses the concept of "stop words" to filter out articles and other parts of speech that don't contribute to meaning of the topic.

In [7]:
from nltk.corpus import wordnet
from sets import Set

for result in results:
  for word in result.key.name.split():
    
    if word in stop:
        continue

    
    synonyms = Set()
    for syn in wordnet.synsets(word):
      
      if ".n." in str(syn):

        for l in syn.lemmas():
          lemma = l.name()
          if (lemma.isalpha()):
            synonyms.add(lemma)
            synonyms.add(plurals.plural(lemma))
      
      if ".a." in str(syn):
        synonyms = Set()
        break

    print result.key.name, word, synonyms
    
    kind = 'Synonym'
    synonym_key = datastore_client.key(kind, result.key.name)

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    
    synonym_key = datastore_client.key(kind, word)

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    
    for dictionary_synonym in synonyms:
      
      synonym_key = datastore_client.key(kind, dictionary_synonym)

      synonym = datastore.Entity(key=synonym_key)
      synonym['synonym'] = result.key.name

      datastore_client.put(synonym)
      
    synonym_key = datastore_client.key(kind, plurals.plural(word))

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    

  from ipykernel import kernelapp as app
  _warn_if_not_unicode(string)


anxiety disorder anxiety Set([u'anxieties', u'anxiousnesses', u'anxiety', u'anxiousness'])
anxiety disorder disorder Set([u'disorders', u'disorderlinesses', u'upset', u'upsets', u'disorderliness', u'disorder'])
anxious anxious Set([])
appetite loss appetite Set([u'appetences', u'appetites', u'appetency', u'appetence', u'appetencies', u'appetite'])
appetite loss loss Set([u'reds', u'loss', u'releases', u'expirations', u'exits', u'deprivation', u'losses', u'passings', u'departure', u'going', u'exit', u'departures', u'goings', u'release', u'passing', u'deprivations', u'red', u'expiration'])
avoid stress avoid Set([])
avoid stress stress Set([u'stresses', u'tension', u'stress', u'focuses', u'strains', u'tensions', u'accents', u'tensenesses', u'accent', u'focus', u'strain', u'emphasis', u'tenseness', u'emphases'])
cognitive behavioural therapy cognitive Set([])
cognitive behavioural therapy behavioural Set([])
cognitive behavioural therapy therapy Set([u'therapy', u'therapies'])
cold hands 