In [None]:
!pip install nltk
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

# **Tokenization**

In [None]:
corpus = """This is Random Paragraph. Really Excited for NLP. Transformers Excites me a lot."""
print(corpus)

This is Random Paragraph. Really Excited for NLP. Transformers Excites me a lot.


In [None]:
sentence = nltk.sent_tokenize(corpus)
print(sentence)

['This is Random Paragraph.', 'Really Excited for NLP.', 'Transformers Excites me a lot.']


In [None]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
for sent in sentence:
  tokens = nltk.word_tokenize(sent)
  tagged = nltk.pos_tag(tokens)
  print(tagged)

[('This', 'DT'), ('is', 'VBZ'), ('Random', 'NNP'), ('Paragraph', 'NNP'), ('.', '.')]
[('Really', 'RB'), ('Excited', 'VBN'), ('for', 'IN'), ('NLP', 'NNP'), ('.', '.')]
[('Transformers', 'NNS'), ('Excites', 'VBZ'), ('me', 'PRP'), ('a', 'DT'), ('lot', 'NN'), ('.', '.')]


In [None]:
for sent in sentence:
  tokens = nltk.wordpunct_tokenize(sent)
  tagged = nltk.pos_tag(tokens)
  print(tagged)

[('This', 'DT'), ('is', 'VBZ'), ('Random', 'NNP'), ('Paragraph', 'NNP'), ('.', '.')]
[('Really', 'RB'), ('Excited', 'VBN'), ('for', 'IN'), ('NLP', 'NNP'), ('.', '.')]
[('Transformers', 'NNS'), ('Excites', 'VBZ'), ('me', 'PRP'), ('a', 'DT'), ('lot', 'NN'), ('.', '.')]


In [None]:
from nltk.tokenize import TreebankWordTokenizer
tokenizer = TreebankWordTokenizer()
tokenizer.tokenize(corpus)

['This',
 'is',
 'Random',
 'Paragraph.',
 'Really',
 'Excited',
 'for',
 'NLP.',
 'Transformers',
 'Excites',
 'me',
 'a',
 'lot',
 '.']

# **Stemming**

In [None]:
words = ["program", "programs", "programmer", "programming", "programmers", "eat", "eating", "Eaten"]

In [None]:
# PorterStemmer
#  It is based on the idea that the suffixes in the English language are made up of a combination of smaller and simpler suffixes. This stemmer is known for its speed and simplicity.
from nltk.stem import PorterStemmer
ps = PorterStemmer()
for word in words:
  print(word, ":", ps.stem(word))

program : program
programs : program
programer : program
programing : program
programers : program
eat : eat
eating : eat
Eaten : eaten


In [None]:
print("eaten : ", ps.stem("eaten"))
print("jump : ", ps.stem("jump"))
print("cried : ", ps.stem("cried"))
print("laughed : ", ps.stem("laughed"))
print("fairly : ", ps.stem("fairly"))
print("sporty : ", ps.stem("sporty"))
print("goes : ", ps.stem("goes"))

eaten :  eaten
jump :  jump
cried :  cri
laughed :  laugh
fairly :  fairli
sporty :  sporti
goes :  goe


In [None]:
# RegexpStemmer class
# The Regexp Stemmer, or Regular Expression Stemmer, is a stemming algorithm that utilizes regular expressions to identify and remove suffixes from words. It allows users to define custom rules for stemming by specifying patterns to match and remove.
from nltk.stem import RegexpStemmer
rs = RegexpStemmer('ing$|s$|able$|ed$', min=4)
for word in words:
  print(word, ":", rs.stem(word))

program : program
programs : program
programer : programer
programing : program
programers : programer
eat : eat
eating : eat
Eaten : Eaten


In [None]:
print("eating : ", rs.stem("eating"))
print("jumped : ", rs.stem("jumped"))
print("cried : ", rs.stem("cried"))
print("laughable : ", rs.stem("laughable"))

eating :  eat
jumped :  jump
cried :  cri
laughable :  laugh


In [None]:
# Snowball Stemmer
# Performs better than PorterStemmer, Multingual, Porter2Stemmer, Improves Performance When Addes to PorterStemmer
from nltk.stem import SnowballStemmer
ss = SnowballStemmer(language='english')

for word in words:
  print(word, ":", ss.stem(word))

program : program
programs : program
programer : program
programing : program
programers : program
eat : eat
eating : eat
Eaten : eaten


In [None]:
print("eating : ", ss.stem("eating"))
print("jumped : ", ss.stem("jumped"))
print("cried : ", ss.stem("cried"))
print("laughed : ", ss.stem("laughed"))
print("fairly : ", ss.stem("fairly"))
print("sporty : ", ss.stem("sporty"))
print("goes : ", ss.stem("goes"))

eating :  eat
jumped :  jump
cried :  cri
laughed :  laugh
fairly :  fair
sporty :  sporti
goes :  goe


# **Lemmatization**

In [None]:
from nltk.stem import WordNetLemmatizer
lm = WordNetLemmatizer()
for word in words:
  print(word, ":", lm.lemmatize(word))

program : program
programs : program
programmer : programmer
programming : programming
programmers : programmer
eat : eat
eating : eating
Eaten : Eaten


In [None]:
print("eaten : ", lm.lemmatize("eaten"))
print("jumped : ", lm.lemmatize("jumped", 'v'))
print("cried : ", lm.lemmatize("cried", 'v'))
print("laughed : ", lm.lemmatize("laughed"))
print("fairly : ", lm.lemmatize("fairly", pos='v'))
print("sporty : ", lm.lemmatize("sporty"))
print("goes : ", lm.lemmatize("goes"))

print("rocks :", lm.lemmatize("rocks"))
print("corpora :", lm.lemmatize("corpora"))

# a denotes adjective in "pos"
print("better :", lm.lemmatize("better", pos="a"))

eaten :  eaten
jumped :  jump
cried :  cry
laughed :  laughed
fairly :  fairly
sporty :  sporty
goes :  go
rocks : rock
corpora : corpus
better : good


In [None]:
'''
Pos
Noun - n
Verb - v
adjective - a
adverb - r
'''

In [None]:
# import these modules
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

print("rocks :", lemmatizer.lemmatize("rocks"))
print("corpora :", lemmatizer.lemmatize("corpora"))

# a denotes adjective in "pos"
print("better :", lemmatizer.lemmatize("better", pos="a"))

rocks : rock
corpora : corpus
better : good


# Stopwords


In [None]:
paragraph = """The sun dipped below the horizon, casting a warm glow over the quiet town. Birds chirped softly as the last rays of light danced on the rippling surface of the river. Along the narrow streets, people bustled about, preparing for the evening ahead. Children laughed as they played under the dimming sky, their joy echoing through the alleys. Meanwhile, the smell of freshly baked bread wafted from a nearby bakery, filling the air with a comforting aroma. The world seemed to slow down in this moment, as if time itself was taking a breath before the night fully embraced the town."""

In [None]:
import nltk
nltk.download('stopwords')
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [None]:
sentences = nltk.sent_tokenize(paragraph)

In [None]:
ss = SnowballStemmer(language='english')

In [None]:
## Apply Stopwords and filter and the stemming
for i in range(len(sentences)):
  words = nltk.word_tokenize(sentences[i])
  words = [ss.stem(word) for word in words if word not in (stopwords.words('english'))]
  sentences[i] = ' '.join(words)

In [None]:
sentences

['the sun dip horizon , cast warm glow quiet town .',
 'bird chirp soft last ray light danc rippl surfac river .',
 'along narrow street , peopl bustl , prepar even ahead .',
 'children laugh play dim sky , joy echo alley .',
 'meanwhil , smell fresh bake bread waft nearbi bakeri , fill air comfort aroma .',
 'the world seem slow moment , time take breath night fulli embrac town .']

In [None]:
lemmaParagraph = """The sun dipped below the horizon, casting a warm glow over the quiet town. Birds chirped softly as the last rays of light danced on the rippling surface of the river. Along the narrow streets, people bustled about, preparing for the evening ahead. Children laughed as they played under the dimming sky, their joy echoing through the alleys. Meanwhile, the smell of freshly baked bread wafted from a nearby bakery, filling the air with a comforting aroma. The world seemed to slow down in this moment, as if time itself was taking a breath before the night fully embraced the town."""

In [None]:
lemmaSentence = nltk.sent_tokenize(lemmaParagraph)

In [None]:
from nltk.stem import WordNetLemmatizer
lm = WordNetLemmatizer()

In [None]:
# lemma Senetnce
## Apply Stopwords and filter and the stemming
for i in range(len(lemmaSentence)):
  words = nltk.word_tokenize(lemmaSentence[i])
  words = [ss.stem(word) for word in words if word not in (stopwords.words('english'))]
  lemmaSentence[i] = ' '.join(words)

In [None]:
lemmaSentence

['the sun dip horizon , cast warm glow quiet town .',
 'bird chirp soft last ray light danc rippl surfac river .',
 'along narrow street , peopl bustl , prepar even ahead .',
 'children laugh play dim sky , joy echo alley .',
 'meanwhil , smell fresh bake bread waft nearbi bakeri , fill air comfort aroma .',
 'the world seem slow moment , time take breath night fulli embrac town .']

# POS Tagging

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
words ={}
for sent in lemmaSentence:
  tokens = nltk.word_tokenize(sent)
  for word in tokens:
    words[word] = nltk.pos_tag([word])

In [None]:
words

{'the': [('the', 'DT')],
 'sun': [('sun', 'NN')],
 'dip': [('dip', 'NN')],
 'horizon': [('horizon', 'NN')],
 ',': [(',', ',')],
 'cast': [('cast', 'NN')],
 'warm': [('warm', 'NN')],
 'glow': [('glow', 'NN')],
 'quiet': [('quiet', 'JJ')],
 'town': [('town', 'NN')],
 '.': [('.', '.')],
 'bird': [('bird', 'NN')],
 'chirp': [('chirp', 'NN')],
 'soft': [('soft', 'JJ')],
 'last': [('last', 'JJ')],
 'ray': [('ray', 'NN')],
 'light': [('light', 'NN')],
 'danc': [('danc', 'NN')],
 'rippl': [('rippl', 'NN')],
 'surfac': [('surfac', 'NN')],
 'river': [('river', 'NN')],
 'along': [('along', 'IN')],
 'narrow': [('narrow', 'NN')],
 'street': [('street', 'NN')],
 'peopl': [('peopl', 'NN')],
 'bustl': [('bustl', 'NN')],
 'prepar': [('prepar', 'NN')],
 'even': [('even', 'RB')],
 'ahead': [('ahead', 'RB')],
 'children': [('children', 'NNS')],
 'laugh': [('laugh', 'NN')],
 'play': [('play', 'NN')],
 'dim': [('dim', 'NN')],
 'sky': [('sky', 'NN')],
 'joy': [('joy', 'NN')],
 'echo': [('echo', 'NN')],
 'all

# Named Entity Recognition (NER)

In [None]:
# Person, Place, Date, Time, Money, Oraganization, Percent

In [None]:
paragraph = """The sun dipped below the horizon, casting a warm glow over the quiet town. Birds chirped softly as the last rays of light danced on the rippling surface of the river. Along the narrow streets, people bustled about, preparing for the evening ahead. Children laughed as they played under the dimming sky, their joy echoing through the alleys. Meanwhile, the smell of freshly baked bread wafted from a nearby bakery, filling the air with a comforting aroma. The world seemed to slow down in this moment, as if time itself was taking a breath before the night fully embraced the town."""

In [None]:
words = nltk.word_tokenize(paragraph)

In [None]:
tagged = nltk.pos_tag(words)

In [None]:
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [None]:
nltk.ne_chunk(tagged)

ModuleNotFoundError: No module named 'svgling'

Tree('S', [('The', 'DT'), ('sun', 'NN'), ('dipped', 'VBD'), ('below', 'IN'), ('the', 'DT'), ('horizon', 'NN'), (',', ','), ('casting', 'VBG'), ('a', 'DT'), ('warm', 'JJ'), ('glow', 'NN'), ('over', 'IN'), ('the', 'DT'), ('quiet', 'JJ'), ('town', 'NN'), ('.', '.'), Tree('PERSON', [('Birds', 'NNP')]), ('chirped', 'VBD'), ('softly', 'RB'), ('as', 'IN'), ('the', 'DT'), ('last', 'JJ'), ('rays', 'NNS'), ('of', 'IN'), ('light', 'NN'), ('danced', 'VBN'), ('on', 'IN'), ('the', 'DT'), ('rippling', 'VBG'), ('surface', 'NN'), ('of', 'IN'), ('the', 'DT'), ('river', 'NN'), ('.', '.'), ('Along', 'IN'), ('the', 'DT'), ('narrow', 'JJ'), ('streets', 'NNS'), (',', ','), ('people', 'NNS'), ('bustled', 'VBD'), ('about', 'RB'), (',', ','), ('preparing', 'VBG'), ('for', 'IN'), ('the', 'DT'), ('evening', 'NN'), ('ahead', 'RB'), ('.', '.'), Tree('PERSON', [('Children', 'NNP')]), ('laughed', 'VBD'), ('as', 'IN'), ('they', 'PRP'), ('played', 'VBD'), ('under', 'IN'), ('the', 'DT'), ('dimming', 'NN'), ('sky', 'NN')

In [None]:
import matplotlib.pyplot as plt
from nltk.tree import Tree

# Convert to tree object and display with Matplotlib
chunked = nltk.ne_chunk(tagged)
tree = Tree.fromstring(str(chunked))
tree.pretty_print()  # Text representation

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     S                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
   ______________________________________________

# Encoding


In [None]:
print("Hello")

Hello


In [None]:
# One Hot Encoding
# One hot encoding is a technique that we use to represent categorical variables as numerical values in a machine learning model.

In [None]:
#one hot encoding using OneHotEncoder of Scikit-Learn

import pandas as pd
from sklearn.preprocessing import OneHotEncoder

#Building a dummy employee dataset for example
data = {'Employee id': [10, 20, 15, 25, 30],
        'Gender': ['M', 'F', 'F', 'M', 'F'],
        'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice'],
        }
#Converting into a Pandas dataframe
df = pd.DataFrame(data)
#Print the dataframe:
print(f"Employee data : \n{df}")

#Extract categorical columns from the dataframe
#Here we extract the columns with object datatype as they are the categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()

#Initialize OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# Apply one-hot encoding to the categorical columns
one_hot_encoded = encoder.fit_transform(df[categorical_columns])

#Create a DataFrame with the one-hot encoded columns
#We use get_feature_names_out() to get the column names for the encoded data
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))

# Concatenate the one-hot encoded dataframe with the original dataframe
df_encoded = pd.concat([df, one_hot_df], axis=1)

# Drop the original categorical columns
df_encoded = df_encoded.drop(categorical_columns, axis=1)

# Display the resulting dataframe
print(f"Encoded Employee data : \n{df_encoded}")

Employee data : 
   Employee id Gender Remarks
0           10      M    Good
1           20      F    Nice
2           15      F    Good
3           25      M   Great
4           30      F    Nice
Encoded Employee data : 
   Employee id  Gender_F  Gender_M  Remarks_Good  Remarks_Great  Remarks_Nice
0           10       0.0       1.0           1.0            0.0           0.0
1           20       1.0       0.0           0.0            0.0           1.0
2           15       1.0       0.0           1.0            0.0           0.0
3           25       0.0       1.0           0.0            1.0           0.0
4           30       1.0       0.0           0.0            0.0           1.0


In [None]:
# Program for demonstration of one hot encoding

# import libraries
import numpy as np
import pandas as pd

# import the data required
data = pd.DataFrame(data)
print(data.head())

   Employee id Gender Remarks
0           10      M    Good
1           20      F    Nice
2           15      F    Good
3           25      M   Great
4           30      F    Nice


In [None]:
data['Gender'].unique()
data['Remarks'].unique()

array(['Good', 'Nice', 'Great'], dtype=object)

In [None]:
data['Gender'].value_counts()
data['Remarks'].value_counts()

Unnamed: 0_level_0,count
Remarks,Unnamed: 1_level_1
Good,2
Nice,2
Great,1


In [None]:
oneHotEncodedData = pd.get_dummies(data, columns = ['Gender', 'Remarks'])
oneHotEncodedData

Unnamed: 0,Employee id,Gender_F,Gender_M,Remarks_Good,Remarks_Great,Remarks_Nice
0,10,False,True,True,False,False
1,20,True,False,False,False,True
2,15,True,False,True,False,False
3,25,False,True,False,True,False
4,30,True,False,False,False,True


In [None]:
# Creating the Bag of Words model
word2count = {}
for data in dataset:
    words = nltk.word_tokenize(data)
    for word in words:
        if word not in word2count.keys():
            word2count[word] = 1
        else:
            word2count[word] += 1

# Bag of Words
X = []
for data in dataset:
    vector = []
    for word in freq_words:
        if word in nltk.word_tokenize(data):
            vector.append(1)
        else:
            vector.append(0)
    X.append(vector)
X = np.asarray(X)

In [None]:
# TF - IDF
from sklearn.feature_extraction.text import TfidfVectorizer
d0 = 'good boy'
d1 = 'good girl'
d2 = 'good girl boy'

# merge documents into a single corpus
string = [d0, d1, d2]

tfidf = TfidfVectorizer()
result = tfidf.fit_transform(string)

# get indexing
print('\nWord indexes:')
print(tfidf.vocabulary_)

# display tf-idf values
print('\ntf-idf value:')
print(result)

# in matrix form
print('\ntf-idf values in matrix form:')
print(result.toarray())


Word indexes:
{'good': 2, 'boy': 0, 'girl': 1}

tf-idf value:
  (0, 2)	0.6133555370249717
  (0, 0)	0.7898069290660905
  (1, 2)	0.6133555370249717
  (1, 1)	0.7898069290660905
  (2, 2)	0.48133416873660545
  (2, 0)	0.6198053799406072
  (2, 1)	0.6198053799406072

tf-idf values in matrix form:
[[0.78980693 0.         0.61335554]
 [0.         0.78980693 0.61335554]
 [0.61980538 0.61980538 0.48133417]]


# Word2Vec

In [None]:
!pip install gensim



In [None]:
import gensim

In [None]:
from gensim.models import Word2Vec, KeyedVectors

In [None]:
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')



In [None]:
vec_king = wv['king']
print(vec_king)
vec_ai = wv['ai']
print(vec_ai)
try:
  vec_machine = wv['machine learning']
  print(vec_machine)
except:
  print("Word not found")

[ 1.25976562e-01  2.97851562e-02  8.60595703e-03  1.39648438e-01
 -2.56347656e-02 -3.61328125e-02  1.11816406e-01 -1.98242188e-01
  5.12695312e-02  3.63281250e-01 -2.42187500e-01 -3.02734375e-01
 -1.77734375e-01 -2.49023438e-02 -1.67968750e-01 -1.69921875e-01
  3.46679688e-02  5.21850586e-03  4.63867188e-02  1.28906250e-01
  1.36718750e-01  1.12792969e-01  5.95703125e-02  1.36718750e-01
  1.01074219e-01 -1.76757812e-01 -2.51953125e-01  5.98144531e-02
  3.41796875e-01 -3.11279297e-02  1.04492188e-01  6.17675781e-02
  1.24511719e-01  4.00390625e-01 -3.22265625e-01  8.39843750e-02
  3.90625000e-02  5.85937500e-03  7.03125000e-02  1.72851562e-01
  1.38671875e-01 -2.31445312e-01  2.83203125e-01  1.42578125e-01
  3.41796875e-01 -2.39257812e-02 -1.09863281e-01  3.32031250e-02
 -5.46875000e-02  1.53198242e-02 -1.62109375e-01  1.58203125e-01
 -2.59765625e-01  2.01416016e-02 -1.63085938e-01  1.35803223e-03
 -1.44531250e-01 -5.68847656e-02  4.29687500e-02 -2.46582031e-02
  1.85546875e-01  4.47265

In [None]:
wv.most_similar('ai')

[('che', 0.6292661428451538),
 ('te', 0.6197360754013062),
 ('essere', 0.5948276519775391),
 ('é', 0.593923032283783),
 ('mai', 0.5895475149154663),
 ('voi', 0.5879174470901489),
 ('tutto', 0.5835935473442078),
 ('ti', 0.5791943073272705),
 ('tutti_i', 0.576695442199707),
 ('questo', 0.5765190124511719)]

In [None]:
wv.most_similar('google')

[('google.com', 0.6711485981941223),
 ('google_yahoo', 0.6488178968429565),
 ('wikipedia', 0.643608033657074),
 ('www.google.com', 0.6258559226989746),
 ('googled', 0.6166064143180847),
 ('googling', 0.6086059212684631),
 ('slashdot', 0.5964587330818176),
 ('lifehacker', 0.5948858857154846),
 ('gizmodo', 0.5884420275688171),
 ('inurl', 0.5882638692855835)]

In [None]:
print("Similarity Between crickrt and sports is ", wv.similarity('cricket', 'sports'))
print("Similarity Between shaharukh and king is ", wv.similarity('shahrukh', 'king'))
print("Similarity Between good and bad is ", wv.similarity('good', 'bad'))

Similarity Between crickrt and sports is  0.40087253
Similarity Between shaharukh and king is  0.2531487
Similarity Between good and bad is  0.7190051


In [None]:
vec = wv['king'] - wv['man'] + wv['woman']
vec
wv.most_similar([vec])

[('king', 0.8449392318725586),
 ('queen', 0.7300517559051514),
 ('monarch', 0.645466148853302),
 ('princess', 0.6156251430511475),
 ('crown_prince', 0.5818676352500916),
 ('prince', 0.5777117609977722),
 ('kings', 0.5613663792610168),
 ('sultan', 0.5376775860786438),
 ('Queen_Consort', 0.5344247817993164),
 ('queens', 0.5289887189865112)]