In [None]:
import textstat                                                                                                                                     # type: ignore

def assign_complexity(text):
    score = textstat.flesch_reading_ease(text)
    
    if score > 60:
        return "Beginner"
    elif score > 30:
        return "Intermediate"
    else:
        return "Advanced"

# Apply labeling
labeled_data = [{"text": data["text"], "complexity": assign_complexity(data["text"])}
                for data in dataset["train"] if len(data["text"]) > 100] # type: ignore

In [None]:
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
import torch                                                                                                                                      # type: ignore
from datasets import Dataset                                                                                                                      # type: ignore

# Tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Convert labeled data into Hugging Face dataset format
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

dataset = Dataset.from_list(labeled_data)
encoded_dataset = dataset.map(preprocess_function, batched=True)

# Convert labels to numerical values
label_mapping = {"Beginner": 0, "Intermediate": 1, "Advanced": 2}
encoded_dataset = encoded_dataset.map(lambda x: {"labels": label_mapping[x["complexity"]]})

# Load model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# Training setup
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=16,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset,
)

# Train model
trainer.train()

In [None]:
def predict_complexity(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits, dim=1).item()
    
    return ["Beginner", "Intermediate", "Advanced"][predicted_class]

# Example: Classifying a study material text
text_sample = "Quantum entanglement is a physical phenomenon where..."
complexity_label = predict_complexity(text_sample)
print("Predicted Complexity:", complexity_label)