In [2]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('1998_verbs.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


Total unique word types: 114
       Word  Count
0   provide     16
1     serve      6
2   operate      6
3      make      6
4   deliver      5
5      help      4
6    enable      4
7    market      3
8     offer      3
9  maintain      3


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('2005', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


In [4]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun if unknown

# Read the list of verbs from a file named '1998_verbs.txt'
with open('2021_verbs.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Lemmatize each verb in the list
lemmatized_verbs = [lemmatizer.lemmatize(verb, get_wordnet_pos(pos)) for verb, pos in nltk.pos_tag(verbs)]

# Count each lemmatized verb's frequency
verb_frequency = Counter(lemmatized_verbs)

# Convert the counter to a dataframe for better visualization
df_verbs = pd.DataFrame(verb_frequency.most_common(), columns=['Verb', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_verbs.sort_values('Count', ascending=False, inplace=True)
df_verbs.reset_index(drop=True, inplace=True)

# Display the top 10 rows
print(df_verbs.head(10))


       Verb  Count
0   deliver      8
1        do      8
2    create      6
3   provide      6
4     serve      5
5      make      5
6  leverage      5
7   achieve      5
8      help      4
9   empower      4


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('2021_verbs.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


Total unique word types: 92
       Word  Count
0        do      8
1   deliver      8
2   provide      6
3    create      6
4      make      5
5     serve      5
6  leverage      5
7   achieve      5
8      help      4
9   empower      4


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [1]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun if unknown

# Read the list of verbs from a file named '1998_verbs.txt'
with open('1998_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Lemmatize each verb in the list
lemmatized_verbs = [lemmatizer.lemmatize(verb, get_wordnet_pos(pos)) for verb, pos in nltk.pos_tag(verbs)]

# Count each lemmatized verb's frequency
verb_frequency = Counter(lemmatized_verbs)

# Convert the counter to a dataframe for better visualization
df_verbs = pd.DataFrame(verb_frequency.most_common(), columns=['Verb', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_verbs.sort_values('Count', ascending=False, inplace=True)
df_verbs.reset_index(drop=True, inplace=True)

# Display the top 10 rows
print(df_verbs.head(10))


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


             Verb  Count
0         believe      2
1           focus      2
2       discovers      1
3       celebrate      1
4        discover      1
5   is considered      1
6  are focused on      1
7            find      1
8            want      1
9   is recognized      1


In [5]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('1998_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


Total unique word types: 18
             Word  Count
0           focus      2
1         believe      2
2          intend      1
3            want      1
4       discovers      1
5       celebrate      1
6        discover      1
7   is considered      1
8  are focused on      1
9            find      1


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
# Read the list of verbs from a file named '1998_verbs.txt'
with open('2005_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Lemmatize each verb in the list
lemmatized_verbs = [lemmatizer.lemmatize(verb, get_wordnet_pos(pos)) for verb, pos in nltk.pos_tag(verbs)]

# Count each lemmatized verb's frequency
verb_frequency = Counter(lemmatized_verbs)

# Convert the counter to a dataframe for better visualization
df_verbs = pd.DataFrame(verb_frequency.most_common(), columns=['Verb', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_verbs.sort_values('Count', ascending=False, inplace=True)
df_verbs.reset_index(drop=True, inplace=True)

# Display the top 10 rows
print(df_verbs.head(10))


                       Verb  Count
0                       see      3
1                  may know      2
2                    ensure      2
3                   believe      1
4              committed to      1
5                    expect      1
6                      find      1
7  are relentlessly focused      1
8                  envision      1
9      can easily determine      1


In [6]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('2005_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


Total unique word types: 15
                       Word  Count
0                       see      3
1                  may know      2
2                    ensure      2
3                   believe      1
4              committed to      1
5                    expect      1
6                      find      1
7  are relentlessly focused      1
8                  envision      1
9      can easily determine      1


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
# Read the list of verbs from a file named '1998_verbs.txt'
with open('2021_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Lemmatize each verb in the list
lemmatized_verbs = [lemmatizer.lemmatize(verb, get_wordnet_pos(pos)) for verb, pos in nltk.pos_tag(verbs)]

# Count each lemmatized verb's frequency
verb_frequency = Counter(lemmatized_verbs)

# Convert the counter to a dataframe for better visualization
df_verbs = pd.DataFrame(verb_frequency.most_common(), columns=['Verb', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_verbs.sort_values('Count', ascending=False, inplace=True)
df_verbs.reset_index(drop=True, inplace=True)

# Display the top 10 rows
print(df_verbs.head(10))


         Verb  Count
0       value      2
1  prioritize      2
2     embrace      2
3       focus      1
4        look      1
5    evaluate      1
6   challenge      1
7  anticipate      1
8    focus on      1
9    discover      1


In [7]:
#★
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from collections import Counter
import pandas as pd

# Ensure you have the necessary NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# A helper function to convert NLTK's part of speech tags to wordnet tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun for unknown cases

# Read the list of verbs from a file named '1998_verbs.txt'
with open('2021_mentalverb.txt', 'r') as file:
    verbs = file.read().strip().lower().split('\n')  # Convert to lowercase and split by new lines

# Tag each verb with its part of speech
tagged_verbs = nltk.pos_tag(verbs)

# Lemmatize each word (now including all parts of speech, not just verbs)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in tagged_verbs]

# Count each lemmatized word's frequency
word_frequency = Counter(lemmatized_words)

# Convert the counter to a dataframe for better visualization
df_words = pd.DataFrame(word_frequency.items(), columns=['Word', 'Count'])

# Sort the dataframe by count in descending order to get the ranking
df_words.sort_values('Count', ascending=False, inplace=True)
df_words.reset_index(drop=True, inplace=True)

# Display the total number of unique word types and the top 10 rows
print(f"Total unique word types: {len(df_words)}")
print(df_words.head(10))


Total unique word types: 25
         Word  Count
0       value      2
1  prioritize      2
2     embrace      2
3       focus      1
4        look      1
5    evaluate      1
6   challenge      1
7  anticipate      1
8    focus on      1
9    discover      1


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kitaz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
