In [5]:
import nltk
from nltk.corpus import wordnet as wn
import csv
import pandas as pd

# Function to map short POS tags to full names
def get_full_pos(pos_tag):
    pos_dict = {
        'n': 'noun',
        'v': 'verb',
        'a': 'adjective',
        's': 'adjective',
        'r': 'adverb'
    }
    return pos_dict.get(pos_tag, 'unknown')  # Return 'unknown' if no match found

# Define a function to get the most common meaning and part of speech
def get_definition_and_pos(word):
    synsets = wn.synsets(word)
    if not synsets:
        return None, None  # In case no definition is found
    
    # Assume the first synset is the most common usage
    most_common = synsets[0]
    definition = most_common.definition()
    pos = get_full_pos(most_common.pos())
    return definition, pos

# Check if resources are available, otherwise download them
nltk.download('wordnet')
nltk.download('words')
nltk.download('omw-1.4')

# Load the list of most common words
freq_words = nltk.corpus.words.words()
freq_words = freq_words[:500]  # Taking the first 500 most common words

# Prepare data for CSV
data = []
for word in freq_words:
    definition, pos = get_definition_and_pos(word)
    if definition:  # Only include words for which a definition could be found
        data.append([word, definition, pos])

# Create a DataFrame and save to CSV
df = pd.DataFrame(data, columns=['words', 'def', 'POS'])
csv_file_path = 'english_frequent_words.csv'
df.to_csv(csv_file_path, index=False)


[nltk_data] Downloading package wordnet to /home/parsa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package words to /home/parsa/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/parsa/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
