In [7]:
%run -i "../util/file_utils.ipynb"
%run -i "../util/lang_utils.ipynb"

# Basic method: spaCy lemma versus spaCy token

In [8]:
text = "I have five birds"
doc = small_model(text)
for token in doc:
    if (token.pos_ == "NOUN" and token.lemma_ != token.text):
        print(token.text, "plural")

birds plural


# Number using morph features

In [9]:
doc = small_model("I have five birds.")
print(doc[3].morph.get("Number"))

['Plur']


# Function to determine number using spaCy

In [10]:
class Noun_number(Enum):
    SINGULAR = 1
    PLURAL = 2

def get_nouns_number(text, model, method="lemma"):
    nouns = []
    doc = model(text)
    for token in doc:
        if (token.pos_ == "NOUN"):
            if method == "lemma":
                if token.lemma_ != token.text:
                    nouns.append((token.text, Noun_number.PLURAL))
                else:
                    nouns.append((token.text, Noun_number.SINGULAR))
            elif method == "morph":
                if token.morph.get("Number") == "Sing":
                    nouns.append((token.text, Noun_number.PLURAL))
                else:
                    nouns.append((token.text, Noun_number.SINGULAR))
    return nouns

# Irregular nouns using small model

In [11]:
text = "Three geese crossed the road"
nouns = get_nouns_number(text, small_model, "morph")
print(nouns)
nouns = get_nouns_number(text, small_model)
print(nouns)

[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]
[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]


# Irregular nouns using large model

In [15]:
#!python -m spacy download en_core_web_lg
large_model = spacy.load("en_core_web_lg")
nouns = get_nouns_number(text, large_model, "morph")
print(nouns)
nouns = get_nouns_number(text, large_model)
print(nouns)

[('geese', <Noun_number.SINGULAR: 1>), ('road', <Noun_number.SINGULAR: 1>)]
[('geese', <Noun_number.PLURAL: 2>), ('road', <Noun_number.SINGULAR: 1>)]


# Noun number using GPT-3

In [13]:
import openai
openai.api_key = OPEN_AI_KEY
prompt="""Decide whether each noun in the following text is singular or plural. 
Return the list in the format of a python tuple: (word, number). 
Sentence: Three geese crossed the road."""
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0,
    max_tokens=256,
    top_p=1.0,
    frequency_penalty=0,
    presence_penalty=0
)
print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\n\n('Three', 'plural'), ('geese', 'plural'), ('road', 'singular')"
    }
  ],
  "created": 1687769012,
  "id": "cmpl-7Vc6mKPiO7Y2Ib04XN89XnIsVOcSf",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 24,
    "prompt_tokens": 44,
    "total_tokens": 68
  }
}


# Converting from singular to plural and plural to singular

In [14]:
from textblob import TextBlob
texts = ["book", "goose", "pen", "point", "deer"]
blob_objs = [TextBlob(text) for text in texts]
plurals = [blob_obj.words.pluralize()[0] for blob_obj in blob_objs]
print(plurals)
blob_objs = [TextBlob(text) for text in plurals]
singulars = [blob_obj.words.singularize()[0] for blob_obj in blob_objs]
print(singulars)

['books', 'geese', 'pens', 'points', 'deer']
['book', 'goose', 'pen', 'point', 'deer']
