In [None]:
from textblob import TextBlob

In [None]:
import nltk
from collections import Counter
from nltk.corpus import words, brown
import re

In [None]:
nltk.download('words')
nltk.download('brown')

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


True

In [None]:
valid_words = set(words.words())
word_freq = Counter(brown.words())
total_words = sum(word_freq.values())

In [None]:
def edits1(word):
  letters = 'abcdefghijklmnopqrstuvwxyz'
  split = [(word[:i], word[i:]) for i in range(len(word)+1)]
  delete = [L + R[1:] for L,R in split if R]
  transpose = [L+R[1]+R[0]+R[2:] for L,R in split if len(R)>1]
  replaces = [L+c+R[1:] for L,R in split if R for c in letters]
  inserts = [L+c+R for L,R in split for c in letters]
  return set(delete + transpose + replaces + inserts)

In [None]:
x = set()
def edits2(word):
  for e1 in edits1(word):
    for e2 in edits1(e1):
      x.add(e2)
  return x

In [None]:
def known(words):
  return set(w for w in words if w.lower() in valid_words)

In [None]:
def probability(word):
  return word_freq[word.lower()]/total_words

In [None]:
def correct(word):
  candidates = (
      known([word]) or
      known(edits1(word)) or
      known(edits2(word)) or
      [word]
  )
  return max(candidates, key = probability)

In [None]:
correct("speling")

'spelling'

In [None]:
test_words = ['speling', 'korrect', 'appl', 'helo', 'beutiful']
print('Correct Words')
for word in test_words:
  print(f"{word} -> {correct(word)}")

Correct Words
speling -> spelling
korrect -> correct
appl -> apply
helo -> help
beutiful -> beautiful


In NLP, commonly used module for spelling correction is TextBlob in python, which provide simple interface for spell checking.

SymSpell, Spello are used for more advanced customized and domain specific spell checking

In [None]:
text = 'I havv a speling mistak in this sentnce'

blob = TextBlob(text)

corrected_text = blob.correct()

print(corrected_text)

I have a spelling mistake in this sentence


#Named Entity Recognition (NER)

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")

text = 'Elon Musk is the CEO of Tesla, and he was born in Pretoria, Sputh Africa'

doc = nlp(text)

entities = []

for entity in doc.ents:
  entities.append((entity.text, entity.label_))

for entity, label in entities:
  print(f'Entity : {entity}, Label : {label}')


Entity : Elon Musk, Label : PERSON
Entity : Tesla, Label : ORG
Entity : Pretoria, Label : GPE
Entity : Sputh Africa, Label : GPE
