In [1]:
%run -i "../util/file_utils.ipynb"
%run -i "../util/lang_utils.ipynb"

# Basic method: spaCy lemma versus spaCy token

In [2]:
text = "I have five birds"
doc = small_model(text)
for token in doc:
    if (token.pos_ == "NOUN" and token.lemma_ != token.text):
        print(token.text, "plural")

birds plural


# Number using morph features

In [3]:
doc = small_model("I have five birds.")
print(doc[3].morph.get("Number"))

['Plur']


# Function to determine number using spaCy

In [4]:
class Noun_number(Enum):
    SINGULAR = 1
    PLURAL = 2

def get_nouns_number(text, model, method="lemma"):
    nouns = []
    doc = model(text)
    for token in doc:
        if (token.pos_ == "NOUN"):
            if method == "lemma":
                if token.lemma_ != token.text:
                    nouns.append((token.text, Noun_number.PLURAL))
                else:
                    nouns.append((token.text, Noun_number.SINGULAR))
            elif method == "morph":
                if token.morph.get("Number") == "Sing":
                    nouns.append((token.text, Noun_number.PLURAL))
                else:
                    nouns.append((token.text, Noun_number.SINGULAR))
    return nouns

# Irregular nouns using small model

In [5]:
text = "Three geese crossed the road"
nouns = get_nouns_number(text, small_model, "morph")
print(nouns)
nouns = get_nouns_number(text, small_model)
print(nouns)

[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]
[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]


# Irregular nouns using large model

In [6]:
#!python -m spacy download en_core_web_lg
large_model = spacy.load("en_core_web_lg")
nouns = get_nouns_number(text, large_model, "morph")
print(nouns)
nouns = get_nouns_number(text, large_model)
print(nouns)

[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]
[('geese', <Noun_number.PLURAL: 2>), ('road', <Noun_number.SINGULAR: 1>)]


# Noun number using GPT-3

In [8]:
from openai import OpenAI
client = OpenAI(api_key=OPEN_AI_KEY)
prompt="""Decide whether each noun in the following text is singular or plural. 
Return the list in the format of a python tuple: (word, number). Do not provide any additional explanations.
Sentence: Three geese crossed the road."""
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    temperature=0,
    max_tokens=256,
    top_p=1.0,
    frequency_penalty=0,
    presence_penalty=0,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ], 
)
print(response.choices[0].message.content)

('geese', 'plural')
('road', 'singular')


# Converting from singular to plural and plural to singular

In [8]:
from textblob import TextBlob
texts = ["book", "goose", "pen", "point", "deer"]
blob_objs = [TextBlob(text) for text in texts]
plurals = [blob_obj.words.pluralize()[0] for blob_obj in blob_objs]
print(plurals)
blob_objs = [TextBlob(text) for text in plurals]
singulars = [blob_obj.words.singularize()[0] for blob_obj in blob_objs]
print(singulars)

['books', 'geese', 'pens', 'points', 'deer']
['book', 'goose', 'pen', 'point', 'deer']
