In [18]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import cohere
from cohere.classify import Example
apiKey = ''
co = cohere.Client(apiKey)

In [19]:
def gen_grammar_check(query):
    query = query.lower()
    response = co.generate(
    model='xlarge',
    prompt=f'This is a spell check generator that checks for grammar and corrects it. This also capitalizes the first letter of the sentence.\n\nSample: I would like a peice of pie.\nCorrect: I would like a piece of the pie.\n\nSample: my coworker said he used a financial planner to help choose his stocks so he wouldn\'t loose money.\nCorrect: My coworker said he used a financial planner to help him choose his stocks so he wouldn\'t lose money.\n\nSample: I ordered pizza, I also ordered garlic knots.\nCorrect: I ordered pizza; I also ordered garlic knots.\n\nSample: i bought winning lottery ticket the corner store\nCorrect: I bought my winning lottery ticket at the corner store.\n\nSample: try to reread your work to ensure you haven\'t left out any small words\nCorrect: Try to reread your work to ensure you haven\'t left out any small words.\n\nSample: I went to the movies with my sister. We will see the new comedy about dancing dogs.\nCorrect: I went to the movies with my sister. We saw the new comedy about dancing dogs.\n\nSample: the boy took their turn on the field.\nCorrect: The boy took his turn on the field.\n--\nSample: I could of won the race if I trained more.\nCorrect: I could have won the race if I had trained more.\n--\nSample: I went to the office, than i started my meeting.\nCorrect: I went to the office, then I started my meeting.\n--\nSample: {query}\nCorrect:',
    max_tokens=100,
    temperature=1.2,
    k=0,
    p=0.75,
    frequency_penalty=0,
    presence_penalty=0,
    stop_sequences=["--"],
    return_likelihoods='NONE')

    output = response.generations[0].text.rstrip("(\n)--")
    output = output.lstrip(" ")
    output = output.lower()
    
    embeds = co.embed(texts=[query, output],
                  model='large',
                  truncate='LEFT').embeddings
    similarity_value = cosine_similarity(X=[embeds[0]], Y=[embeds[1]], dense_output=True)
    print(f"input query: {query}")
    print(f"generated correction: {output}")
    print(similarity_value)
    return 1 if similarity_value>0.997 else 0

In [20]:
query = "they are afraid of people. people are not good."
gen_grammar_check(query)

input query: they are afraid of people. people are not good.
generated correction: they are afraid of people. people are not good.
[[1.]]


1

In [21]:
#load test data
df = pd.read_csv("grammar_test.csv", delimiter=',', header=None, names=['sentence', 'label'])
df.head()

Unnamed: 0,sentence,label
0,We ate at a really fancy restaurant.,1
1,John was unknown to be the murderer.,0
2,On which I consulted with the chairman of the ...,0
3,David constructed the house.,1
4,He'll bring me a hot dog if he sees one.,1


In [24]:
df['preds'] = df['sentence'][:100].apply(lambda x: gen_grammar_check(x))

input query: we ate at a really fancy restaurant.
generated correction: we ate at a really fancy restaurant.
[[1.]]
input query: john was unknown to be the murderer.
generated correction: john was unknown to be the murderer.
[[1.]]
input query: on which i consulted with the chairman of the select committee was this matter.
generated correction: on which i consulted with the chairman of the select committee was this matter.
[[1.]]
input query: david constructed the house.
generated correction: david constructed the house.
[[1.]]
input query: he'll bring me a hot dog if he sees one.
generated correction: he'll bring me a hot dog if he sees one.
[[1.]]
input query: a boy did not laugh.
generated correction: a boy did not laugh.
[[1.]]
input query: i admired him a teacher.
generated correction: i admired him as a teacher.
[[0.9795603]]
input query: henry wanted to possibly marry fanny.
generated correction: henry wanted to possibly marry fanny.
[[1.]]
input query: the imposition of the gov

In [30]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
labels = df['label'][:100].astype(float).to_list()
preds = df['preds'][:100].to_list()

print(f'accuracy = {accuracy_score(labels, preds)}')
print(f'precision = {precision_score(labels, preds, pos_label=0)}')
print(f'recall = {recall_score(labels, preds, pos_label=0)}')

accuracy = 0.59
precision = 0.7368421052631579
recall = 0.4745762711864407


next

In [None]:
#load test data
df = pd.read_csv("grammar_test.csv", delimiter=',', header=None, names=['sentence', 'label'])
df.head()

In [66]:
import time
def class_grammar_check(query):
    response = co.classify(
    model='cdb39157-6b82-4cb4-92c5-9e6037623d79-ft',
    inputs=[f"{query}"])
    # print(response.classifications[0].prediction)
    time.sleep(1) # beacause of 100 calls/min limit on trial key from Cohere
    return(float(response.classifications[0].prediction))

In [67]:
query = "they are afraid of people. people are not good."
class_grammar_check(query)

1.0

In [68]:
df['preds'] = df['sentence'][:1000].apply(lambda x: class_grammar_check(x))

In [71]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
labels = df['label'][:1000].astype(float).to_list()
preds = df['preds'][:1000].to_list()

print(f'accuracy = {accuracy_score(labels, preds)}')
print(f'precision = {precision_score(labels, preds, pos_label=0)}')
print(f'recall = {recall_score(labels, preds, pos_label=0)}')

accuracy = 0.676
precision = 0.8928571428571429
recall = 0.4
