In [47]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from collections import defaultdict

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Create a WordNetLemmatizer instance
lemma = WordNetLemmatizer()

def get_word_level(word):
  """
  Assigns a score to a word based on its POS tag after lemmatization
  """
  # Lemmatize the word
  lemma_word = lemma.lemmatize(word)
  synsets = wn.synsets(lemma_word)  # Use wn.synsets for synsets
  if synsets:
    pos = wn.synsets(lemma_word)[0].pos() if len(synsets) > 0 else None  # Access POS from first synset
    if pos in ('n', 'NN', 'NNS'):  # Nouns (lower score)
      score = 0.2
    elif pos in ('v', 'VB', 'VBD', 'VBG', 'VBP', 'VBZ'):  # Verbs (higher score)
      score = 0.8
    else:  # Other parts of speech (medium score)
      score = 0.5
    return score
  return None

def classify_words(text):
  """
  Classifies words based on score-based thresholds and potential fallback
  """
  tokens = nltk.word_tokenize(text)
  tagged = nltk.pos_tag(tokens)

  classified_words = defaultdict(list)  # Improved data structure for word lists

  for word, pos in tagged:
    level = get_word_level(word)
    if level is not None:
      if level >= 0.8:  # Potential C1/C2 threshold (adjust as needed)
        classified_words['C1/C2'].append(word)
      elif level <= 0.4:  # a1-a2 (combined)
        classified_words['A1/A2'].append(word)
      else:  # b1-b2 (combined)
        classified_words['B1/B2'].append(word)
  return classified_words

def read_text_file(filename):
  """
  Reads the contents of a text file and returns the text as a string
  """
  with open(filename, 'r') as f:
    text = f.read()
  return text

def write_results_to_file(results, filename):
  """
  Writes classification results to a text file
  """
  with open(filename, 'w') as f:
    f.write("CEFR Level Classification (score-based thresholds):\n")
    for level, words in results.items():
      f.write(f"{level}: {words}\n")

# Specify the filename of your text file and output file
text_file = "input_text.txt"
output_file = "cefr_classification_results.txt"  # Replace with your desired output filename

# Read text from the file
text = read_text_file(text_file)

# Classify words
word_lists = classify_words(text)

# Write results to a text file
write_results_to_file(word_lists, output_file)

print(f"CEFR Level Classification results written to: {output_file}")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


CEFR Level Classification results written to: cefr_classification_results.txt
