In [1]:

#For Implementing Gramformer Solution
from gramformer import Gramformer

#for implementing Bert Solution
from happytransformer import HappyTextToText, TTSettings

#For Implementing LanguageTool Solution
import language_tool_python

#For GingerIt Solution
from gingerit.gingerit import GingerIt

#For Web Application Development
import gradio as gr

import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


## Gramformer Model implemented in Gradio 

In [2]:
gf = Gramformer(models=1, use_gpu=False) #1=corrector, 2=detector

[Gramformer] Grammar error correct/highlight model loaded..


In [3]:
gf.correct('hello my dear childs')

{'hello my dear child.'}

    Function to build web application using gradio

In [4]:
def correct(sentence):
    res = gf.correct(sentence) 
    return res
app_inputs = gr.inputs.Textbox(lines=3, placeholder="Enter a grammatically incorrect sentence here...")

interface = gr.Interface(fn=correct, 
                        inputs=app_inputs,
                         outputs='text', 
                        title='Hi there, I\'m Gramformer')

#interface.launch()

  "Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components",


    Gramformer highlighter

In [5]:
gh = Gramformer(models=3, use_gpu=False) #1=corrector, 2=detector

In [6]:
gh.highlight(orig='to tha store',cor='to the store')

"to <c type='SPELL' edit='the'>tha</c> store"

## Bert + Huggingface Solution

In [7]:
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)
result = happy_tt.generate_text("grammar: This sentences has has bads grammar.", args=args)
print(result.text)

This sentence has bad grammar.


In [8]:
def huggingface_corrector(text):
    result = happy_tt.generate_text('grammar: ' + text, args=TTSettings(num_beams=1, min_length=1, max_length=100))
    return result.text

app_hugginface_inputs = gr.inputs.Textbox(lines=3, placeholder="Enter a grammatically incorrect sentence here...")

interface2 = gr.Interface(fn=huggingface_corrector, 
                        inputs=app_hugginface_inputs,
                        outputs='text', 
                        title='Hi there, I\'m Huggingface')

#interface2.launch()

## Language Tool Solution

In [9]:
my_tool = language_tool_python.LanguageTool('en-US')  
my_text = """LanguageTool provides utility to check grammar and spelling errors. We just have to paste the text here and click the 'Check Text' button. Click the colored phrases for for information on potential errors. or we can use this text too see an some of the issues that LanguageTool can dedect. Whot do someone thinks of grammar checkers? Please not that they are not perfect. Style problems get a blue marker: It is 7 P.M. in the evening. The weather was nice on Monday, 22 November 2021"""   

def english_text_corrector(tool, text):
    
    matches = tool.check(text)

    #empty lists
    Mistakes = [] 
    Corrections = []  
    StartPositions = []  
    EndPositions = []  

    for rules in matches:
        if len(rules.replacements) > 0:  
            StartPositions.append(rules.offset)  
            EndPositions.append(rules.errorLength + rules.offset)  
            Mistakes.append(my_text[rules.offset : rules.errorLength + rules.offset])  
            Corrections.append(rules.replacements[0]) 

    print("Mistakes made")
    print (Mistakes)
    print ("\nRecommended Corrections")
    print(Corrections)
    print ("\nMistake Starting character number")
    print(StartPositions)
    print ("\nMistake EndPoint character number")
    print(EndPositions)

    mistakes_number = len (Mistakes)

    print( "\nNumber of mistakes made " + str(mistakes_number))
    #return mistakes_number

english_text_corrector(my_tool, my_text)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Mistakes made
['for for', 'or', 'too see', 'an', 'dedect', 'Whot', 'not', 'P.M. in the evening']

Recommended Corrections
['for', 'Or', 'to see', 'a', 'detect', 'Who', 'note', 'P.M.']

Mistake Starting character number
[165, 206, 230, 238, 282, 290, 341, 414]

Mistake EndPoint character number
[172, 208, 237, 240, 288, 294, 344, 433]

Number of mistakes made 8


## GingerIt Solution

In [10]:
text = 'The smelt of fliwers bring back memories.'

parser = GingerIt()
parser.parse(text)['result']

'The smell of flowers brings back memories.'

## Trying GingerIt Model on Datasets

In [11]:
df = pd.read_csv('./grammatical error detection/NLP Assignment/test_data.csv')

In [12]:
df.head(20)
new_df = df.head(40)

In [13]:
def ginger_correction(text: str):
    
    parser = GingerIt()
    correction = parser.parse(text)['result']
    return correction

In [14]:
new_df['corrected_sentence'] = new_df['input'].apply(lambda text: ginger_correction(text))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [15]:
pd.set_option('display.max_colwidth', None)
new_df.head(40)

Unnamed: 0,input,corrected_sentence
0,I believe they will master Japanese soon because they were selected as scholarship recipients .,I believe they will master Japanese soon because they were selected as scholarship recipients.
1,I am looking for it .,I am looking for it.
2,"Apple is a round fruit with smooth and colorful skin , red , green and sometimes gold .","Apple is a round fruit with smooth and colorful skin, red, green and sometimes gold."
3,Let It Will Be Push .,Let It Will Be Pushed.
4,I rode on this ship from Sendai .,I rode on this ship from Sendai.
5,I thought Korean is very interesting language when I was sutdying Korean .,I thought Korean is very interesting language when I was studying Korean.
6,Nobody is perfect .,Nobody is perfect.
7,Can you lend me some money ?,Can you lend me some money?
8,Does anyone correct my previous article ?XD,Does anyone correct my previous article? XD
9,"Similarly , I want to thank and for all their help and the nice time spent together , and wish to continue being together .","Similarly, I want to thank and for all their help and the nice time spent together, and wish to continue being together."


## Language Tool Model on Datasets

In [16]:
tool = language_tool_python.LanguageTool('en-US')
tool.correct(text)
def language_tool_corrector(text:str):
    correction = tool.correct(text)
    return correction

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [17]:
new_df['corrected_sentence'] = new_df['input'].apply(lambda text: language_tool_corrector(text))
new_df.head(40)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,input,corrected_sentence
0,I believe they will master Japanese soon because they were selected as scholarship recipients .,I believe they will master Japanese soon because they were selected as scholarship recipients.
1,I am looking for it .,I am looking for it.
2,"Apple is a round fruit with smooth and colorful skin , red , green and sometimes gold .","Apple is a round fruit with smooth and colorful skin, red, green and sometimes gold."
3,Let It Will Be Push .,Let It Will Be Push.
4,I rode on this ship from Sendai .,I rode on this ship from Sendai.
5,I thought Korean is very interesting language when I was sutdying Korean .,I thought Korean is very interesting language when I was studying Korean.
6,Nobody is perfect .,Nobody is perfect.
7,Can you lend me some money ?,Can you lend me some money ?
8,Does anyone correct my previous article ?XD,Does anyone correct my previous article ?XD
9,"Similarly , I want to thank and for all their help and the nice time spent together , and wish to continue being together .","Similarly, I want to thank and for all their help and the nice time spent together, and wish to continue being together."


## Trying Bert + Hugging Face on Datasets

In [18]:
def huggingface_corrector(text):
    result = happy_tt.generate_text('grammar: ' + text, args=TTSettings(num_beams=1, min_length=1, max_length=100))
    return result.text

new_df['corrected_sentence'] = new_df['input'].apply(lambda text: huggingface_corrector(text))
new_df.head(40)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,input,corrected_sentence
0,I believe they will master Japanese soon because they were selected as scholarship recipients .,I believe they will master Japanese soon because they were selected as scholarship recipients.
1,I am looking for it .,I am looking for it.
2,"Apple is a round fruit with smooth and colorful skin , red , green and sometimes gold .","Apple is a round fruit with smooth and colorful skin , red , green and sometimes gold."
3,Let It Will Be Push .,Let It Be Pushed.
4,I rode on this ship from Sendai .,I rode on this ship from Sendai.
5,I thought Korean is very interesting language when I was sutdying Korean .,I thought Korean is a very interesting language when I was studying Korean.
6,Nobody is perfect .,Nobody is perfect.
7,Can you lend me some money ?,Can you lend me some money?
8,Does anyone correct my previous article ?XD,Does anyone correct my previous article?XD
9,"Similarly , I want to thank and for all their help and the nice time spent together , and wish to continue being together .","Similarly, I want to thank and for all their help and the nice time spent together, and wish to continue being together."
