## Importing Libraries & Datasets

In [1]:

#For Implementing Gramformer Solution
from gramformer import Gramformer

#for implementing Bert Solution
from happytransformer import HappyTextToText, TTSettings

#For Implementing LanguageTool Solution
import language_tool_python

#For GingerIt Solution
from gingerit.gingerit import GingerIt

#For Symspellpy
import pkg_resources
from symspellpy import SymSpell

#For TextBlob
from textblob import TextBlob

#For Web Application Development
import gradio as gr

import pandas as pd
import sys 


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('./grammatical error detection/NLP Assignment/test_data.csv')
df.head(20)
test_df = df.head(40)
pd.set_option('display.max_colwidth', None)

## Gramformer Model 

In [3]:
gf = Gramformer(models=1, use_gpu=False) #1=corrector, 2=detector

[Gramformer] Grammar error correct/highlight model loaded..


In [4]:
gf.correct('hello my dear childs')

{'hello my dear child.'}

    Function to build web application using gradio

In [5]:
def correct(sentence):
    res = gf.correct(sentence) 
    return res
# app_inputs = gr.inputs.Textbox(lines=3, placeholder="Enter a grammatically incorrect sentence here...")

# interface = gr.Interface(fn=correct, 
#                         inputs=app_inputs,
#                          outputs='text', 
#                         title='Hi there, I\'m Gramformer')

#interface.launch()

    Gramformer highlighter

In [6]:
# gh = Gramformer(models=3, use_gpu=False) #1=corrector, 2=detector

In [7]:
# gh.highlight(orig='to tha store',cor='to the store')

### Trying Gramformer on Datasets

In [8]:
def gramformer_corrector(text):
    res = gf.correct(text) 
    return res

    Calling Gramformer model for correction

In [9]:
test_df['corrected_sentence'] = test_df['input'].apply(lambda text: gramformer_corrector(text))
test_df.head(40)

KeyboardInterrupt: 

## Bert + Huggingface Model 

In [None]:
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)
result = happy_tt.generate_text("grammar: This sentences has has bads grammar.", args=args)
print(result.text)

In [None]:
def huggingface_corrector(text):
    result = happy_tt.generate_text('grammar: ' + text, args=TTSettings(num_beams=1, min_length=1, max_length=100))
    return result.text
# app_hugginface_inputs = gr.inputs.Textbox(lines=3, placeholder="Enter a grammatically incorrect sentence here...")

# interface2 = gr.Interface(fn=huggingface_corrector, 
#                         inputs=app_hugginface_inputs,
#                         outputs='text', 
#                         title='Hi there, I\'m Huggingface')

#interface2.launch()

### Trying Bert + Hugging Face on Datasets

In [None]:
def huggingface_corrector(text):
    result = happy_tt.generate_text('grammar: ' + text, args=TTSettings(num_beams=1, min_length=1, max_length=100))
    return result.text

    Calling Bert model for correction

In [None]:
test_df['corrected_sentence'] = test_df['input'].apply(lambda text: huggingface_corrector(text))
test_df.head(40)

## Language Tool Model

In [None]:
my_tool = language_tool_python.LanguageTool('en-US')  
my_text = """LanguageTool provides utility to check grammar and spelling errors. We just have to paste the text here and click the 'Check Text' button. Click the colored phrases for for information on potential errors. or we can use this text too see an some of the issues that LanguageTool can dedect. Whot do someone thinks of grammar checkers? Please not that they are not perfect. Style problems get a blue marker: It is 7 P.M. in the evening. The weather was nice on Monday, 22 November 2021"""   

def english_text_corrector(tool, text):
    
    matches = tool.check(text)

    #empty lists
    Mistakes = [] 
    Corrections = []  
    StartPositions = []  
    EndPositions = []  

    for rules in matches:
        if len(rules.replacements) > 0:  
            StartPositions.append(rules.offset)  
            EndPositions.append(rules.errorLength + rules.offset)  
            Mistakes.append(my_text[rules.offset : rules.errorLength + rules.offset])  
            Corrections.append(rules.replacements[0]) 

    print("Mistakes made")
    print (Mistakes)
    print ("\nRecommended Corrections")
    print(Corrections)
    print ("\nMistake Starting character number")
    print(StartPositions)
    print ("\nMistake EndPoint character number")
    print(EndPositions)

    mistakes_number = len (Mistakes)

    print( "\nNumber of mistakes made " + str(mistakes_number))
    #return mistakes_number

#english_text_corrector(my_tool, my_text)

### Language Tool Model on Datasets

In [None]:
tool = language_tool_python.LanguageTool('en-US')
def language_tool_corrector(text:str):
    correction = tool.correct(text)
    return correction

    Calling language tool model for correction

In [None]:
test_df['corrected_sentence'] = test_df['input'].apply(lambda text: language_tool_corrector(text))
test_df.head(40)

## GingerIt Model

In [None]:
text = 'The smelt of fliwers bring back memories.'

parser = GingerIt()
parser.parse(text)['result']

### Trying GingerIt Model on Datasets

In [None]:
def ginger_corrector(text: str):
    
    parser = GingerIt()
    correction = parser.parse(text)['result']
    return correction

        Calling GingerIt model for correction

In [None]:
test_df['corrected_sentence'] = test_df['input'].apply(lambda text: ginger_corrector(text))
test_df.head(40)

## Symspellpy Model

In [None]:
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_dictionary_en_82_765.txt"
)
bigram_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_bigramdictionary_en_243_342.txt"
)
# term_index is the column of the term and count_index is the
# column of the term frequency
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
sym_spell.load_bigram_dictionary(bigram_path, term_index=0, count_index=2)
input_term = (
    "whereis th elove hehad dated forImuch of thepast who "
    "couqdn'tread in sixtgrade and ins pired him"
)
# max edit distance per lookup (per single word, not per whole input string)
suggestions = sym_spell.lookup_compound(input_term, max_edit_distance=2)

for suggestion in suggestions:
    print(suggestion)

In [None]:
def symspelly_corrector(text):
    sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
    dictionary_path = pkg_resources.resource_filename(
        "symspellpy", "frequency_dictionary_en_82_765.txt"
    )
    bigram_path = pkg_resources.resource_filename(
        "symspellpy", "frequency_bigramdictionary_en_243_342.txt"
    )

    sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
    sym_spell.load_bigram_dictionary(bigram_path, term_index=0, count_index=2)
    suggestions = sym_spell.lookup_compound(text, max_edit_distance=2)

    for suggestion in suggestions:
        return suggestion

In [None]:
test_df['corrected_sentence'] = test_df['input'].apply(lambda text: symspelly_corrector(text))
test_df.head(40)

## Textblob

In [None]:
def textblob_corrector(text):
    correction = TextBlob(text)
    return correction.correct()

In [None]:
test_df['ginger_corrected_sentence'] = test_df['input'].apply(lambda text: textblob_corrector(text))
test_df.head()

## Combining GingerIt with Bert

In [None]:
test_df['ginger_corrected_sentence'] = test_df['input'].apply(lambda text: ginger_corrector(text))
test_df['combined_with_bert_corrected_sentence'] = test_df['ginger_corrected_sentence'].apply(lambda text: huggingface_corrector(text))
test_df.head(40)

## Combining Gingerit with Gramformer

In [None]:
test_df['ginger_corrected_sentence'] = test_df['input'].apply(lambda text: ginger_corrector(text))
test_df['combined_with_gramformer'] = test_df['ginger_corrected_sentence'].apply(lambda text: gramformer_corrector(text))
test_df.head(40)

## Combining Textblob with Bert

In [None]:
test_df['Textblob_corrected_sentence'] = test_df['input'].apply(lambda text: textblob_corrector(text))
test_df['combined_bert_with_textblob'] = test_df['Textblob_corrected_sentence'].apply(lambda text: huggingface_corrector(text))
test_df.head(40)