# Library Import

In [21]:
import numpy as np
import pandas as pd
from googletrans import Translator
import Levenshtein as L
from autocorrect import Speller
from spello.model import SpellCorrectionModel

# Read Data

In [2]:
df = pd.read_csv('English.csv')
df.head()

Unnamed: 0,English words/sentences
0,Hi.
1,Run!
2,Run!
3,Who?
4,Wow!


In [3]:
df.describe()

Unnamed: 0,English words/sentences
count,175621
unique,123100
top,I can't tell you how happy I am that you've co...
freq,32


In [4]:
df.isnull().sum()

English words/sentences    0
dtype: int64

# Task 1
----------
Translate each word or sentence from English to Spanish, French and German 

In [10]:
# Taking only 100 samples as current hardware deosn't have capability to compute for 175621 samples
df1 = df.sample(n=100) 
df1

Unnamed: 0,English words/sentences
23559,How did you get in?
13200,You smell awful.
154161,I think he can get along with his neighbors.
110732,"Now, what else can I do for you?"
83359,I shouldn't have interfered.
...,...
118953,Don't think about stuff like that.
74647,He advised me not to smoke.
39089,You have been warned.
153881,"I arrived late, so I didn't hear everything."


In [11]:
translator = Translator()

df1['Spanish'] = df1['English words/sentences'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df1['French'] = df1['English words/sentences'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df1['German'] = df1['English words/sentences'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))

In [12]:
df1

Unnamed: 0,English words/sentences,Spanish,French,German
23559,How did you get in?,¿Cómo entraste?,Comment êtes-vous entré?,Wie bist du reingekommen?
13200,You smell awful.,Hueles horrible.,Vous sentez mauvais.,Du riechst schrecklich.
154161,I think he can get along with his neighbors.,Creo que puede llevarse bien con sus vecinos.,Je pense qu'il peut s'entendre avec ses voisins.,"Ich denke, er kommt mit seinen Nachbarn zurecht."
110732,"Now, what else can I do for you?","Ahora, ¿qué más puedo hacer por ti?","Maintenant, que puis-je faire d'autre pour vous ?",Was kann ich sonst noch für Sie tun?
83359,I shouldn't have interfered.,No debería haber interferido.,Je n'aurais pas dû intervenir.,Ich hätte mich nicht einmischen sollen.
...,...,...,...,...
118953,Don't think about stuff like that.,No pienses en cosas así.,Ne pense pas à des trucs comme ça.,Denk nicht an solche Sachen.
74647,He advised me not to smoke.,Me aconsejó que no fumara.,Il m'a conseillé de ne pas fumer.,"Er riet mir, nicht zu rauchen."
39089,You have been warned.,Usted ha sido advertido.,Tu étais prévenu.,Du wurdest gewarnt.
153881,"I arrived late, so I didn't hear everything.","Llegué tarde, así que no escuché todo.","Je suis arrivé en retard, donc je n'ai pas tou...","Ich kam spät an, also habe ich nicht alles geh..."


# Task 2
--------
Program to correct the spelling from the word or a given sentence.

In [19]:
# Reading 50 Misspelled words
df2 = pd.read_csv('train_csv/aspell.txt', delimiter=':', header=None)
df2 = df2.iloc[:50, :]
# Reading 50 Misspelled sentences
df3 = pd.read_csv('train_csv/aspell1.csv', delimiter=',', header=None)
df3 = df3.iloc[:50, :]
df2 = df2.append(df3, ignore_index=True)
df2.rename(columns={0:'Original', 1:'Misspelled'}, inplace=True)
df2

  df2 = df2.append(df3, ignore_index=True)


Unnamed: 0,Original,Misspelled
0,Nevada,nevade
1,Presbyterian,presbyterian
2,RSX,rsx
3,Stephen,Steffen
4,Susan,susan
...,...,...
95,moree attack prompts police warning,ormee attack prompts police warning
96,receding floodwater leaves layer of sludge,receding fKokdwater lSaCes layer of sludge
97,home construction figures fall abs statistics,home construction fiugrse flal abs statistics
98,nyiaparli people claim fortescue forced them to,nyia)Wrli people clA(m foDtesFue forced them to


## Using AutoCorrect - Speller

In [20]:
spell = Speller(lang='en')
df2['Corrected_Speller'] = df2['Misspelled'].apply(spell)
df2

Unnamed: 0,Original,Misspelled,Corrected_Speller
0,Nevada,nevade,evade
1,Presbyterian,presbyterian,presbyterian
2,RSX,rsx,rs
3,Stephen,Steffen,Stephen
4,Susan,susan,susan
...,...,...,...
95,moree attack prompts police warning,ormee attack prompts police warning,free attack prompts police warning
96,receding floodwater leaves layer of sludge,receding fKokdwater lSaCes layer of sludge,preceding fKokdwater leaves layer of sludge
97,home construction figures fall abs statistics,home construction fiugrse flal abs statistics,home construction figure flag abs statistics
98,nyiaparli people claim fortescue forced them to,nyia)Wrli people clA(m foDtesFue forced them to,nia)Wali people cl(m foDtesFue forced them to


### Levenshtein Distance

In [34]:
df2['L_Corrected_Speller'] = df2.apply(lambda x: L.ratio(x['Corrected_Speller'], x['Original']), axis=1)
df2

Unnamed: 0,Original,Misspelled,Corrected_Speller,L_Corrected_Speller
0,Nevada,nevade,evade,0.666667
1,Presbyterian,presbyterian,presbyterian,0.880000
2,RSX,rsx,rs,0.000000
3,Stephen,Steffen,Stephen,0.933333
4,Susan,susan,susan,0.727273
...,...,...,...,...
95,moree attack prompts police warning,ormee attack prompts police warning,free attack prompts police warning,0.956522
96,receding floodwater leaves layer of sludge,receding fKokdwater lSaCes layer of sludge,preceding fKokdwater leaves layer of sludge,0.941176
97,home construction figures fall abs statistics,home construction fiugrse flal abs statistics,home construction figure flag abs statistics,0.943820
98,nyiaparli people claim fortescue forced them to,nyia)Wrli people clA(m foDtesFue forced them to,nia)Wali people cl(m foDtesFue forced them to,0.869565


## Using Spello - SpellCorrectionModel