In [None]:
import re
import urllib.request

In [None]:
word = input("Please enter your word: ")
url = "http://www.dictionary.com/browse/" + word
data = urllib.request.urlopen(url).read().decode("utf-8")
print(data[:200])

In [None]:
pattern = r'<div class="NZKOFkdkcvYgD3lqOIJw"><div>\s*(.*?)\s*</div></div>'
matches = re.findall(pattern, data, flags=re.DOTALL)

matches[:10]

In [None]:
cleaned = re.sub(r'<!--\s*-->', '', matches[0])
# Remove any leftover HTML tags (e.g., <a href="...">, etc.)
cleaned = re.sub(r'<[^>]+>', '', cleaned)
cleaned = cleaned.strip()
cleaned

In [None]:
cleaned = re.sub(r'<!--\s*-->', '', matches[0])
# Remove any leftover HTML tags (e.g., <a href="...">, etc.)
#cleaned = re.sub(r'<[^>]+>', '', cleaned)
cleaned

In [None]:
definitions = []
    for match in matches:
        # Remove HTML comments
        cleaned = re.sub(r'<!--\s*-->', '', match)
        # Remove any leftover HTML tags (e.g., <a href="...">, etc.)
        cleaned = re.sub(r'<[^>]+>', '', cleaned)
        cleaned = cleaned.strip()

**Breakdown of `<[^>]+>`:**

- **`<`:**  
  Matches a literal less-than character. This is the starting delimiter of an HTML tag.

- **`[^>]`:**  
  This is a character class with a caret (`^`) at the beginning, which means "match any character except those inside the brackets." In this case, it matches any character that is not a greater-than sign (`>`).

- **`+`:**  
  A quantifier that means "one or more" of the preceding element. Here, it requires that there is at least one character that is not a `>`.

- **`>`:**  
  Matches a literal greater-than character, which is the closing delimiter of an HTML tag.

**Combined Effect:**

The pattern `<[^>]+>` will match:
- A `<` character,
- Followed by one or more characters that are not `>`,
- And finally a `>` character.

This effectively describes any substring that begins with `<` and ends with `>`, covering typical HTML tags such as `<a href="/browse/carnivore">` or `</a>`.


In [None]:
import re
import urllib.request

def get_definitions(word, num_defs=5):
    # Construct the URL for the word
    url = "http://www.dictionary.com/browse/" + word
    try:
        data = urllib.request.urlopen(url).read().decode("utf-8")
    except Exception as e:
        print("Error fetching the URL:", e)
        return []

    # --- REGEX EXPLANATION ---
    #   <div class="NZKOFkdkcvYgD3lqOIJw"><div> <!-- --> ... </div></div>
    #
    # We attempt to capture what’s inside the inner <div>...</div>.
    # The pattern:
    #   <div class="NZKOFkdkcvYgD3lqOIJw"><div>\s*(.*?)\s*</div></div>
    #
    # Note: The class names (like NZKOFkdkcvYgD3lqOIJw) may be dynamic.
    pattern = r'<div class="NZKOFkdkcvYgD3lqOIJw"><div>\s*(.*?)\s*</div></div>'

    # Use DOTALL so that the dot matches newline characters as well
    matches = re.findall(pattern, data, flags=re.DOTALL)

    definitions = []
    for match in matches:
        # Remove HTML comments
        cleaned = re.sub(r'<!--\s*-->', '', match)
        # Remove any leftover HTML tags (e.g., <a href="...">, etc.)
        cleaned = re.sub(r'<[^>]+>', '', cleaned)
        cleaned = cleaned.strip()
        if cleaned and cleaned not in definitions:
            definitions.append(cleaned)
        if len(definitions) >= num_defs:
            break

    return definitions

# Ask the user for a word
word = input("Please enter your word: ")
defs = get_definitions(word)

if defs:
    print("\nDefinitions for '{}' :\n".format(word))
    for i, d in enumerate(defs, start=1):
        print(f"{i}. {d}")
else:
    print("No definitions found. The website's structure may have changed.")

In [None]:
def dictionary():
    return get_definitions(str(input("Please enter your word: ")))

In [None]:
dictionary()

In [None]:
dictionary()

In [None]:
dictionary()