# StoryOracle â€” Notebook 01: Data Preparation
Purpose: Clean story paragraphs, extract readability + lexical features, prepare dataset for modeling.


In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import textstat
from textblob import TextBlob

nltk.download('punkt')
class TextAnalyzer:
    def __init__(self, text):
        self.text = text
        self.sentences = sent_tokenize(text)
        self.words = word_tokenize(text)

    def word_count(self):
        return len(self.words)

    def sentence_count(self):
        return len(self.sentences)

    def average_word_length(self):
        if self.word_count() == 0:
            return 0
        return sum(len(word) for word in self.words) / self.word_count()

    def flesch_reading_ease(self):
        return textstat.flesch_reading_ease(self.text)

    def sentiment_analysis(self):
        blob = TextBlob(self.text)
        return blob.sentiment.polarity, blob.sentiment.subjectivity

    def get_analysis(self):
        analysis = {
            "word_count": self.word_count(),
            "sentence_count": self.sentence_count(),
            "average_word_length": self.average_word_length(),
            "flesch_reading_ease": self.flesch_reading_ease(),
            "sentiment_polarity": self.sentiment_analysis()[0],
            "sentiment_subjectivity": self.sentiment_analysis()[1],
        }
        return analysis

: 