In [None]:
#nlp 2
#morphological

import pandas as pd

data={
    'base_word':['play','run','write','happy'],
    'added_morph':['-ed','-ing','-er','-ness'],
    'new_word':['played','running','writer','happiness'],
    'deleted_morph':['-','-','-','-'],
    'new_word(after_deletion)':['play','run','write','happy']
}

df= pd.DataFrame(data)
print(df)


def add_morph(row):
    return row['base_word'] + row['added_morph']

def delete_morph(row):
    if row['deleted_morph'] != '-':
        return row['base_word'].replace(row['deleted_morph'],'')
    return row['base_word']

df["New Word [After Addition]"]= df.apply(add_morph, axis=1)
df["New Word [After Deletion]"]= df.apply(delete_morph, axis=1)
    
print(df)

  base_word added_morph   new_word deleted_morph new_word(after_deletion)
0      play         -ed     played             -                     play
1       run        -ing    running             -                      run
2     write         -er     writer             -                    write
3     happy       -ness  happiness             -                    happy
  base_word added_morph   new_word deleted_morph new_word(after_deletion)  \
0      play         -ed     played             -                     play   
1       run        -ing    running             -                      run   
2     write         -er     writer             -                    write   
3     happy       -ness  happiness             -                    happy   

  New Word [After Addition] New Word [After Deletion]  
0                   play-ed                      play  
1                   run-ing                       run  
2                  write-er                     write  
3                hap

In [None]:
#nlp 3
#pos

import nltk
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

from nltk import word_tokenize,pos_tag

sentence= "The quick brown fox jumps over the lazy dog."
tokens= word_tokenize(sentence)
pos= pos_tag(tokens)

print(pos)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
#nlp 7
#ner
import spacy

nlp= spacy.load("en_core_web_sm")
sentence= "Barack Obama was the 44th President of United States."
doc= nlp(sentence)

for entity in doc.ents:
    print(f"Entity: {entity.text}, Label: {entity.label_}")

from spacy import displacy
displacy.render(doc, style="ent")


Entity: Barack Obama, Label: PERSON
Entity: 44th, Label: ORDINAL
Entity: United States, Label: GPE


In [None]:
#nlp 6
#build n evaluate ner

import spacy
from sklearn.metrics import  classification_report

def predict_entities(text):
    nlp= spacy.load("en_core_web_sm")
    doc= nlp(text)
    return [(ent.text,ent.label_) for ent in doc.ents]

def evaluate_ner(texts_and_labels):
    true_labels=[]
    pred_labels=[]

    for text, true_ents in texts_and_labels:
        pred= predict_entities(text)
        true_labels.extend([label for _, label in true_ents])
        pred_labels.extend([label for _, label in pred])

    return classification_report(true_labels,pred_labels)

text= "Apple CEO Tim Cook announced new iphone model in California yesterday."
print("\nExample:")
for entity, label in predict_entities(text):
    print(f"{entity}:{label}")

test_data=[
    ("Google opened a new office in Paris.",
     [("Google","ORG"), ("Paris","GPE")])
]

print("\nEvaluation report:")
print(evaluate_ner(test_data))


Example:
Apple:ORG
Tim Cook:PERSON
California:GPE
yesterday:DATE

Evaluation report:
              precision    recall  f1-score   support

         GPE       1.00      1.00      1.00         1
         ORG       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [None]:
#nlp 4
#semantic relationship

import nltk
from nltk.corpus import wordnet as wn

def get(word1,word2):
    synsets1=wn.synsets(word1)
    synsets2=wn.synsets(word2)

    if not synsets1 or not synsets2:
        print(f"Semantic relationship cannot be determined for {word1} and {word2}")

    synset1= synsets1[0]
    synset2= synsets2[0]

    relationship={
        "Word 1":word1,
        "Word 2":word2,
        "Synset1 definition":synset1.definition(),
        "Synset2 definition":synset2.definition(),
        "Similarity":synset1.wup_similarity(synset2),
        "Synset1 hypernyms":synset1.hypernyms(),
        "Synset2 hypernyms":synset2.hypernyms(),
        "Synset1 hyponyms":synset1.hyponyms(),
        "Synset2 hyponyms":synset2.hyponyms(),
    }
    return relationship

word1= "dog"
word2= "cat"

put= get(word1,word2)

for key,value in put.items():
    print(f"{key}: {value}")

Word 1: dog
Word 2: cat
Synset1 definition: a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Synset2 definition: feline mammal usually having thick soft fur and no ability to roar: domestic cats; wildcats
Similarity: 0.8571428571428571
Synset1 hypernyms: [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
Synset2 hypernyms: [Synset('feline.n.01')]
Synset1 hyponyms: [Synset('great_pyrenees.n.01'), Synset('working_dog.n.01'), Synset('hunting_dog.n.01'), Synset('poodle.n.01'), Synset('mexican_hairless.n.01'), Synset('puppy.n.01'), Synset('leonberg.n.01'), Synset('newfoundland.n.01'), Synset('corgi.n.01'), Synset('dalmatian.n.02'), Synset('cur.n.01'), Synset('lapdog.n.01'), Synset('pooch.n.01'), Synset('pug.n.01'), Synset('griffon.n.02'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('basenji.n.01')]
Synset2 hyponyms: [Synset('wildcat.n.03'), Synset('domestic_cat.n.01')]


In [21]:
#nlp 5
#n gram model

import nltk
from nltk import trigrams
from nltk.corpus import reuters
from collections import defaultdict

nltk.download('reuters')
nltk.download('punkt')

words = nltk.word_tokenize(' '.join(reuters.words()))

tri_grams = list(trigrams(words))

model = defaultdict(lambda: defaultdict(lambda: 0))

for w1, w2, w3 in tri_grams:
 model[(w1, w2)][w3] += 1

for w1_w2 in model:
    total_count = float(sum(model[w1_w2].values()))
    for w3 in model[w1_w2]:
        model[w1_w2][w3] /= total_count

def predict_next_word(w1, w2):
    next_word = model[w1, w2]
    if next_word:
        predicted_word = max(next_word, key=next_word.get) # Choose the most 
        return predicted_word
    else:
        return "No prediction available"

print("Next Word:", predict_next_word('the', 'stock'))


[nltk_data] Downloading package reuters to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Next Word: of
