#### Args and Kwars

In [8]:
def show_user_info(**data):
    # data is a dict
    for key, value in data.items():
        print(f"{key}: {value}")

In [10]:
show_user_info(name="Ahmed", job="data scientist")

name: Ahmed
job: data scientist


In [8]:
def my_function(a, b, *args, c=2, **kwargs):
    print(a+b)
    print(args[0])
    print(args[1])
    print(c)
    for key, value in kwargs.items():
        print(f"{key}: {value}")

In [9]:
my_function(2,3,['1','2','3'],['4','3'])

5
['1', '2', '3']
['4', '3']
2


### Spelling Corrector 

In [1]:
word = 'somthing'
splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]

In [2]:
splits

[('', 'somthing'),
 ('s', 'omthing'),
 ('so', 'mthing'),
 ('som', 'thing'),
 ('somt', 'hing'),
 ('somth', 'ing'),
 ('somthi', 'ng'),
 ('somthin', 'g'),
 ('somthing', '')]

In [3]:
for L, R in splits:
    print(L,R[1:])

 omthing
s mthing
so thing
som hing
somt ing
somth ng
somthi g
somthin 
somthing 


In [4]:
## Deletes a letter from the input word
for L, R in splits:
    print(L + R[1:])

omthing
smthing
sothing
somhing
somting
somthng
somthig
somthin
somthing


In [5]:
## Transposes or Swaps each letter with each other in an input
for L,R in splits:
    if len(R)>1:
        print(L + R[1] + R[0] + R[2:])

osmthing
smothing
sotmhing
somhting
somtihng
somthnig
somthign


In [6]:
## Replaces each letter in the word with every other letter in the alphabet
letters    = 'abcdefghijklmnopqrstuvwxyz'
for L, R in splits:
    if R:
        for c in letters:
            print(L + c + R[1:])

aomthing
bomthing
comthing
domthing
eomthing
fomthing
gomthing
homthing
iomthing
jomthing
komthing
lomthing
momthing
nomthing
oomthing
pomthing
qomthing
romthing
somthing
tomthing
uomthing
vomthing
womthing
xomthing
yomthing
zomthing
samthing
sbmthing
scmthing
sdmthing
semthing
sfmthing
sgmthing
shmthing
simthing
sjmthing
skmthing
slmthing
smmthing
snmthing
somthing
spmthing
sqmthing
srmthing
ssmthing
stmthing
sumthing
svmthing
swmthing
sxmthing
symthing
szmthing
soathing
sobthing
socthing
sodthing
soething
softhing
sogthing
sohthing
soithing
sojthing
sokthing
solthing
somthing
sonthing
soothing
sopthing
soqthing
sorthing
sosthing
sotthing
southing
sovthing
sowthing
soxthing
soything
sozthing
somahing
sombhing
somching
somdhing
somehing
somfhing
somghing
somhhing
somihing
somjhing
somkhing
somlhing
sommhing
somnhing
somohing
somphing
somqhing
somrhing
somshing
somthing
somuhing
somvhing
somwhing
somxhing
somyhing
somzhing
somtaing
somtbing
somtcing
somtding
somteing
somtfing
somtging
s

In [7]:
letters    = 'abcdefghijklmnopqrstuvwxyz'
for L, R in splits:
    for c in letters:
        print(L+c+R)

asomthing
bsomthing
csomthing
dsomthing
esomthing
fsomthing
gsomthing
hsomthing
isomthing
jsomthing
ksomthing
lsomthing
msomthing
nsomthing
osomthing
psomthing
qsomthing
rsomthing
ssomthing
tsomthing
usomthing
vsomthing
wsomthing
xsomthing
ysomthing
zsomthing
saomthing
sbomthing
scomthing
sdomthing
seomthing
sfomthing
sgomthing
shomthing
siomthing
sjomthing
skomthing
slomthing
smomthing
snomthing
soomthing
spomthing
sqomthing
sromthing
ssomthing
stomthing
suomthing
svomthing
swomthing
sxomthing
syomthing
szomthing
soamthing
sobmthing
socmthing
sodmthing
soemthing
sofmthing
sogmthing
sohmthing
soimthing
sojmthing
sokmthing
solmthing
sommthing
sonmthing
soomthing
sopmthing
soqmthing
sormthing
sosmthing
sotmthing
soumthing
sovmthing
sowmthing
soxmthing
soymthing
sozmthing
somathing
sombthing
somcthing
somdthing
something
somfthing
somgthing
somhthing
somithing
somjthing
somkthing
somlthing
sommthing
somnthing
somothing
sompthing
somqthing
somrthing
somsthing
somtthing
somuthing
somvthing


In [8]:
def possible_combinations_of_words(word):
    "All edits that are one edit away from `word`."
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

In [9]:
n = len(word)
print(f'Total number of deletions: {n}')
print(f'Total number of transposes: {n - 1}')
print(f'Total number of replacements: {26*(n)}')
print(f'Total number of insertions: {26*(n+1)}')
print(f'Total number of combinations: {54*n+25}')
print(f'Removing duplicates therefore we get: 442')

Total number of deletions: 8
Total number of transposes: 7
Total number of replacements: 208
Total number of insertions: 234
Total number of combinations: 457
Removing duplicates therefore we get: 442


In [11]:
combinations_of_input_word = possible_combinations_of_words('somthing')
len(combinations_of_input_word)

442

### Importing the Big Text

In [4]:
from collections import Counter
import re

In [5]:
def words(text): 
    return re.findall(r'\w+', text.lower())


In [6]:
big_text = open('big.txt').read()

In [7]:
list_of_words_in_big_text = re.findall(r'\w+', big_text.lower())

In [8]:
count_of_words_in_big_text = Counter(list_of_words_in_big_text)
count_of_words_in_big_text

Counter({'the': 79809,
         'project': 288,
         'gutenberg': 263,
         'ebook': 87,
         'of': 40024,
         'adventures': 17,
         'sherlock': 101,
         'holmes': 467,
         'by': 6735,
         'sir': 177,
         'arthur': 34,
         'conan': 4,
         'doyle': 5,
         '15': 47,
         'in': 22023,
         'our': 1065,
         'series': 128,
         'copyright': 51,
         'laws': 233,
         'are': 3630,
         'changing': 43,
         'all': 4143,
         'over': 1282,
         'world': 362,
         'be': 6155,
         'sure': 123,
         'to': 28765,
         'check': 38,
         'for': 6941,
         'your': 1279,
         'country': 423,
         'before': 1362,
         'downloading': 5,
         'or': 5352,
         'redistributing': 7,
         'this': 4063,
         'any': 1203,
         'other': 1501,
         'header': 7,
         'should': 1297,
         'first': 1174,
         'thing': 303,
         'seen': 444,
  

### Getting the words that exist in the Big Text with our combination of words

In [9]:
def known_words_from_big_text(combination): 
    return set(w for w in combination if w in count_of_words_in_big_text)

In [12]:
matched_words_from_big_text = known_words_from_big_text(combinations_of_input_word)

In [13]:
matched_words_from_big_text

{'something', 'soothing'}

### Second possible combination of words

In [14]:
combinations_of_input_word

{'aomthing',
 'asomthing',
 'bomthing',
 'bsomthing',
 'comthing',
 'csomthing',
 'domthing',
 'dsomthing',
 'eomthing',
 'esomthing',
 'fomthing',
 'fsomthing',
 'gomthing',
 'gsomthing',
 'homthing',
 'hsomthing',
 'iomthing',
 'isomthing',
 'jomthing',
 'jsomthing',
 'komthing',
 'ksomthing',
 'lomthing',
 'lsomthing',
 'momthing',
 'msomthing',
 'nomthing',
 'nsomthing',
 'omthing',
 'oomthing',
 'osmthing',
 'osomthing',
 'pomthing',
 'psomthing',
 'qomthing',
 'qsomthing',
 'romthing',
 'rsomthing',
 'samthing',
 'saomthing',
 'sbmthing',
 'sbomthing',
 'scmthing',
 'scomthing',
 'sdmthing',
 'sdomthing',
 'semthing',
 'seomthing',
 'sfmthing',
 'sfomthing',
 'sgmthing',
 'sgomthing',
 'shmthing',
 'shomthing',
 'simthing',
 'siomthing',
 'sjmthing',
 'sjomthing',
 'skmthing',
 'skomthing',
 'slmthing',
 'slomthing',
 'smmthing',
 'smomthing',
 'smothing',
 'smthing',
 'snmthing',
 'snomthing',
 'soamthing',
 'soathing',
 'sobmthing',
 'sobthing',
 'socmthing',
 'socthing',
 'sod

In [15]:
def scnd_possible_combinations(combinations):
    a = []
    for e1 in combinations:
        for e2 in possible_combinations_of_words(e1):
            a.append(e2)
    return a

In [16]:
scnd_comb_of_words = scnd_possible_combinations(combinations_of_input_word)

###  Getting words that exist in Big Text from the second combination of words

In [18]:
matched_scnd_words_from_big_text = known_words_from_big_text(scnd_comb_of_words)

In [19]:
matched_scnd_words_from_big_text

{'loathing',
 'nothing',
 'scathing',
 'seething',
 'smoothing',
 'something',
 'soothing',
 'sorting'}

### Calculate the probabiilties of each word in big text

In [20]:
def probablity_calc(word, N=sum(count_of_words_in_big_text.values())): 
    return count_of_words_in_big_text[word] / N

In [21]:
probablity_calc('the')

0.07154004401278254

In [22]:
probablity_calc('outrivaled')
8.9645577245801e-07

8.9645577245801e-07

### Final Function

In [1]:
import json
from collections import Counter
import re
#from Utilities import *

In [9]:
def possible_combinations_of_words(word):
    "All edits that are one edit away from `word`."
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

def words(text): 
    return re.findall(r'\w+', text.lower())

def known_words_from_big_text(combination,count_of_words_in_big_text): 
    return set(w for w in combination if w in count_of_words_in_big_text)

def scnd_possible_combinations(combinations):
    a = []
    for e1 in combinations:
        for e2 in possible_combinations_of_words(e1):
            a.append(e2)
    return a

def candidates(*args):
    return (known_words_from_big_text([args[0]],args[3]) or  
            args[1] or 
            args[2] or 
            [args[0]])
    
def correction(possible_candidates, count_of_words_in_big_text):
     return max(possible_candidates, key = lambda k: count_of_words_in_big_text[k]/sum(count_of_words_in_big_text.values()))

In [7]:
word = 'somthing'

first_combinations= possible_combinations_of_words(word)

big_text = open('big.txt').read()
list_of_words_in_big_text = words(big_text)
count_of_words_in_big_text =  Counter(list_of_words_in_big_text)

second_combinations = scnd_possible_combinations(first_combinations)
first_matched_words_from_big_text = known_words_from_big_text(first_combinations,count_of_words_in_big_text)
second_matched_words_from_big_text = known_words_from_big_text(second_combinations,count_of_words_in_big_text)

possible_candidates = candidates(word,first_matched_words_from_big_text,second_matched_words_from_big_text,count_of_words_in_big_text)

final_candidate = correction(possible_candidates, count_of_words_in_big_text)

In [8]:
final_candidate

'something'

In [5]:
import json
from collections import Counter
import re

def possible_combinations_of_words(word):
        letters    = 'abcdefghijklmnopqrstuvwxyz'
        splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
        deletes    = [L + R[1:]               for L, R in splits if R]
        transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
        replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
        inserts    = [L + c + R               for L, R in splits for c in letters]
        return set(deletes + transposes + replaces + inserts)
        

def words(text): 
        return re.findall(r'\w+', text.lower())
        
def known_words_from_big_text(combination,count_of_words_in_big_text): 
        return set(w for w in combination if w in count_of_words_in_big_text)
        
def scnd_possible_combinations(combinations):
        a = []
        for e1 in combinations:
            for e2 in possible_combinations_of_words(e1):
                a.append(e2)
        return a

def candidates(*args):
    return (known_words_from_big_text([args[0]],args[3]) or  
            args[1] or 
            args[2] or 
            [args[0]])
            

def correction(possible_candidates,count_of_words_in_big_text):
    return max(possible_candidates, key = lambda k: count_of_words_in_big_text[k]/sum(count_of_words_in_big_text.values()))

def lambda_handler(event,context):
    word = event['rawQueryString']
    
    #word = 'somthing'
    first_combinations= possible_combinations_of_words(word)
    big_text = open('big.txt').read()
    list_of_words_in_big_text = re.findall(r'\w+', big_text.lower())
    
    count_of_words_in_big_text =  Counter(list_of_words_in_big_text)
    second_combinations = scnd_possible_combinations(first_combinations)
    first_matched_words_from_big_text = known_words_from_big_text(first_combinations,count_of_words_in_big_text)
    second_matched_words_from_big_text = known_words_from_big_text(second_combinations,count_of_words_in_big_text)
    
    possible_candidates = candidates(word,first_matched_words_from_big_text,second_matched_words_from_big_text,count_of_words_in_big_text)
    c = correction(possible_candidates,count_of_words_in_big_text)

    return{
        'statusCode' :200,
        'body': json.dumps(c)
        
    }
    