In [None]:
import nltk
import spacy
from pathlib import Path
import os

nlp = spacy.load("en_core_web_sm")
nlp.max_length = 2000000

In [None]:
#Import dependencies 
from nltk.corpus import cmudict

d = cmudict.dict() 

# Download required NLTK data
nltk.download('punkt')
nltk.download('cmudict')

def fk_level(text, d):
    """Returns the Flesch-Kincaid Grade Level of a text (higher grade is more difficult).
    Requires a dictionary of syllables per word.

    Args:
        text (str): The text to analyze.
        d (dict): A dictionary of syllables per word.

    Returns:
        float: The Flesch-Kincaid Grade Level of the text. (higher grade is more difficult)
    """
    # Tokenise sentences and words
    sentences = nltk.sent_tokenize(text)
    words = [word.lower() for word in nltk.word_tokenize(text) if word.isalpha()]
    
    # Calculate basic counts
    total_sentences = len(sentences)
    total_words = len(words)
    total_syllables = 0
    
    # Calculate total syllables using CMU dict and fallback method
    for word in words:
        # Try to get syllable count from CMU dictionary first
        if word in d:
            # Get syllable count from CMU dictionary (count stress markers) - first pronunciation variant
            pronunciation = d[word][0]
            syllables = sum(1 for phoneme in pronunciation if phoneme[-1].isdigit())
            total_syllables += syllables
        else:
            # Fallback syllable counting for words not in CMU dict
            vowels = "aeiouy"
            syllable_count = 0
            prev_char_was_vowel = False
            
        # Count vowel clusters
            for char in word:
                if char in vowels:
                    if not prev_char_was_vowel:
                        syllable_count += 1
                    prev_char_was_vowel = True
                else:
                    prev_char_was_vowel = False
            
            # Adjust for silent e at end
            if word.endswith('e') and syllable_count > 1:
                syllable_count -= 1
            
            # Ensure at least one syllable
            syllable_count = max(1, syllable_count)
            total_syllables += syllable_count
            
    # Calculate Flesch-Kincaid Grade Level
    if total_sentences == 0 or total_words == 0:
        return 0.0  # Avoid division by zero errors

    avg_words_per_sentence = total_words / total_sentences
    avg_syllables_per_word = total_syllables / total_words
    
    fk_score = (0.39 * avg_words_per_sentence) + (11.8 * avg_syllables_per_word) - 15.59
    return fk_score
