# Meaning

In [None]:
#@title functions

import requests
from lxml import html
import re
import pandas as pd
import numpy as np

def get_link(word='get'):

    # check if each character in 'word' is alphanumeric

    # CHECK HOW TO RECOGNIZE WORDS AS: let somebody/something ↔ down

    validate = sum([1 for w in word if w.isalpha()]) == len(word)

    if validate: word = word

    else:
      word = word.split()
      word = ' '.join(word)
      word = word.split()
      word = [re.sub(r"[^a-zA-Z0-9-/↔]", "", w) for w in word if len(w)>1]
      word = [re.sub(r"[-/↔]", " ", w) for w in word]
      word = [w.split() for w in word]
      word = [item for sublist in word for item in sublist]
      word = '-'.join(word)

    return f"https://www.ldoceonline.com/dictionary/{word}".lower()


def get_tree(word='get'):

  url = get_link(word)

  headers = {
      'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36'
      }

  response = requests.get(url, headers=headers)
  response.encoding = 'utf-8'

  return html.fromstring(response.text)


def definition(word='get'):

    title = ''
    meaning = ''
    tree = get_tree(word)

    #################################################################################
    ## If the ask is correctly and the meaning is in longman
    #################################################################################


    ## 00 TITLE ##
    title = tree.xpath('//h1[contains(@class,"title")]')

    if len(title): title = title[0].text_content()    # in case the word doesn't exists in longman

    ## 01 DICTENTRY ##      Show dictionay Source: Longman dict, bussiness dict, empty, others.
    ##                      # Nro of Senses for each "dictentry"

    dictentry_path = '//span[contains(@class,"dictentry")]'
    dictentries = tree.xpath(dictentry_path)

    for i, entry in enumerate(dictentries):

      n = f'[{i+1}]'
      dictentry_name_path = dictentry_path + n + '//span[contains(@class,"dictionary_intro")]'
      dictentries_name = tree.xpath(dictentry_name_path)

    ## POS ENTRY    ##     Show type of word: verb, noun, adjective
    # frequent Head, Head

      POS_path = dictentry_path + n + '//span[contains(@class,"Head")]//span[@class="POS"]'
      POS = tree.xpath(POS_path)

      if len(POS) > 0:
        for p in POS:
          meaning += f'[{p.text_content().strip().upper()}]: '

      sense_path = dictentry_path + n + '//span[@class="Sense"]'
      senses = tree.xpath(sense_path)

    ## 02 SENSES ##

      # This classes in tag <span> will be showed in "senses"
      classes = ['SIGNPOST', 'DEF', 'SYN', 'OPP', 'RELATEDWD', 'BREQUIV']

      for j, s in enumerate(senses):

        m = f'[{j+1}]'
        subsense_path = sense_path + m + '//span[@class="Subsense"]'
        subsenses = tree.xpath(subsense_path)

        sense_cla = {cla: tree.xpath(f'{sense_path}{m}/span[@class="{cla}"]') for cla in classes}

        for s in sense_cla.keys():

          if len(sense_cla[s]) > 0:

            class_text = f'{sense_cla[s][0].text_content().strip()}'

            if s == 'SIGNPOST': meaning += f'{class_text.upper()} '
            else: meaning += f'{class_text}; '

    ## 03 SUBSENSES ##        Show each "Subsense"

      # This classes in tag <span> will be showed in "senses"
        classes1 = ['SIGNPOST', 'sensenum span', 'DEF', 'SYN', 'OPP', 'RELATEDWD', 'BREQUIV']

        for k, ss in enumerate(subsenses):

          n = f'[{k+1}]'
          subsense_cla = {cla: tree.xpath(f'{subsense_path}{n}/span[@class="{cla}"]') for cla in classes1}

          for s in subsense_cla.keys():
            if len(subsense_cla[s]) > 0:
              meaning += f'{subsense_cla[s][0].text_content()}; '

    #################################################################################
    ## If the output is: "Did you mean:"
    #################################################################################

    did_youmean_list = tree.xpath('//ul[@class="didyoumean"]/li')

    if len(did_youmean_list) > 0:
      for did in did_youmean_list:
        meaning += did.text_content().strip() + ', '

    #################################################################################
    ## If the output is: "Sorry, there are no results for" => meaning = ''
    #################################################################################

    no_results_title = 'Sorry, there are no results for'[:31]

    if title[:31] == no_results_title:
      title = no_results_title
      meaning = ''

    #################################################################################
    ##
    #################################################################################

    title = ' '.join(title.split())

    return word, title, meaning

### read and save xls file ###

excel_file = '/content/drive/MyDrive/meanings.xlsx'

def read_xls_meanings():
    mn = pd.read_excel(excel_file, header=None, index_col=None)
    return mn

def save_xls_meanings(df):  # Save the updated DataFrame
    df.to_excel(excel_file, index=False, header=None)

def update_dataframe(df):

  """ Prepare and save the dataframe in the final format """

  # the key columns is equal to the first word on word columns
  df.iloc[:,0] = df.iloc[:, 1].str[0]
  df[0] = df[0].str.lower()

  # temporary pass "word" to lower for sort
  df['lower'] = df[1].str.lower()
  df = df.sort_values(by=['lower'])
  df.drop(columns=['lower'], inplace=True)

  # Dropping duplicated in key column
  duplicates = df.duplicated(subset=0, keep='first')
  df.loc[duplicates, 0] = np.nan

  # save the new version
  df.to_excel(excel_file, index=False, header=None)


def show_save_meaning( word ):

    title = ''
    nro = 1
    meaning = ''

    mn = read_xls_meanings()
    words = mn[1].values

    if word in words:       # to delete blank spaces in words

        mn.loc[mn[1] == word, 2] += 1
        title, nro, meaning = tuple(mn.loc[mn[1]==word, [1,2,3]].values[0])

        save_xls_meanings(mn)
        return word, title, nro, meaning

    else:
        word, title, meaning = definition(word)

        if title in words:    # Since here is useful to work with 'title'

            mn.loc[mn[1] == title, 2] += 1
            title, nro, meaning = tuple(mn.loc[mn[1]==title, [1,2,3]].values[0])

            save_xls_meanings(mn)
            return word, title, nro, meaning

        elif title in ['Did you mean:', 'Sorry, there are no results for']:

            return title, word, meaning

        else:

            row = {0: title[0], 1: title, 2: nro, 3: meaning}
            mn.loc[len(mn)] = row

            update_dataframe(mn)
            return word, title, nro, meaning


def update_meaning_word(word='get', new_meaning='new meaning'):

    mn = read_xls_meanings()

    if word in mn[1].values:
      idx = mn[mn[1] == word].index[0]  # Get the index of the row containing the word
      mn.iloc[idx, 3] = new_meaning     # Update the meaning in the corresponding row

      save_xls_meanings(mn)
      return word, new_meaning

    else:
      print(f"Word '{word}' not found in meanings DataFrame.")


def introduce_word(word='a1', meaning='a1 definition'):

  mn = read_xls_meanings()

  if word in mn[1].values:
    print('Word already exists')

  else:

    row = {0: word[0], 1: word, 2: 1, 3: meaning }
    mn.loc[len(mn)] = row

    update_dataframe(mn)
    return word, meaning

def delete_word(word='abandon'):

  mn = read_xls_meanings()

  if word not in mn[1].values:
    print(f"Word {word} doesn't exist")

  else:

    idx = mn[mn[1] == 'abandon'].index[0]
    mn = mn.drop(index=idx)

    update_dataframe(mn)
    print(f'{word}: was deleted')


In [None]:
#    <<    ASK, SHOW AND SAVE ASK    >>

show_save_meaning('shake up')

('shake up',
 'shake somebody/something ↔ up',
 2,
 '[PHRASAL VERB]: to give someone a very unpleasant shock, so that they feel very upset and frightened; to make changes to an organization in order to make it more effective; SYN overhaul; [PHRASAL VERB]: to make changes in an organization or system to make it more successful, effective etc; ')

In [None]:
#        show meaning from meanings
word = 'demotion'

mm = read_xls_meanings()  # improve to search for 5 or 10 first characters
tuple(mm[mm[1]==word].values[0][1:])

('demotion', 1, '[NOUN]: a reduction in rank or position; ')

In [None]:
word1 = 'pack'
dd = definition( word1 )
dd

# d1 = dd[2]
# d1 = "[ADJECTIVE]: upset, shocked, or frightened by something that has happened to you; having had one's composure or confidence upset; shocked or disturbed; "
# update_meaning_word(word1, d1)

('pack',
 'pack',
 '[VERB]: CLOTHES to put things into cases, bags etc ready for a trip somewhere; GOODS to put something into a box or other container, so that it can be moved, sold, or stored; CROWD to go in large numbers into a space, or to make people or animals do this, until the space is too full; PROTECT SOMETHING to cover or fill an object with soft material so that it does not get damaged; SNOW/SOIL ETC to press snow, soil, sand etc down so that it becomes hard and firm; [NOUN]: THINGS WRAPPED TOGETHER something wrapped in paper or packed in a box and then sent by post or taken somewhere; SMALL CONTAINER a small container, usually made of paper, that something is sold in; SYN packet British English; BAG a bag that you carry on your back, especially when climbing or walking, used to carry equipment, clothes etc; , backpack; SYN rucksack British English; CARDS a complete set of playing cards; SYN deck; ANIMALS a group of wild animals that hunt together, or a group of dogs traine

In [None]:
word2 = 'demotion'
defin2 = '[NOUN]: a reduction in rank or position; '
#introduce_word(word2, defin2)
#delete_word('abandon')

('demotion', '[NOUN]: a reduction in rank or position; ')