In [1]:
from docx import Document
from textstat import flesch_reading_ease
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from textblob import TextBlob
from gensim.models import LdaModel
from gensim.corpora import Dictionary
from gensim.models import CoherenceModel

## Prerequesties


In [2]:
full_doc = Document('Alexander.docx')
summarized_doc = Document('Alexander_summerized.docx')

full_text_content = []
summarized_text_content = []
for paragraph in full_doc.paragraphs:
    full_text_content.append(paragraph.text)

full_doc_text = '\n'.join(full_text_content)

for paragraph in summarized_doc.paragraphs:
    summarized_text_content.append(paragraph.text)

summarized_doc_text = '\n'.join(summarized_text_content)



## Information Retention
Evaluate how much of the original content is retained in the summary. This can be measured by comparing the number of words or sentences in the summary to the original text.

In [3]:
def calculate_information_retention(original_text, summary):
    original_words = set(original_text.lower().split())
    summary_words = set(summary.lower().split())
    retention = len(summary_words.intersection(original_words)) / len(original_words)
    return retention


information_retention = calculate_information_retention(full_doc_text,summarized_doc_text )
print(f"Information Retention: {information_retention:.2%}")


Information Retention: 43.22%


## Readability
Measure the average sentence length in the summary. Shorter sentences are often more readable.

In [4]:
def calculate_readability(summary):
    return flesch_reading_ease(summary)

readability_score = calculate_readability(summarized_doc_text)
print(f"Readability Score: {readability_score:.2f}")


Readability Score: 63.49


## Clarity and Conciseness
Ensure that the language used in the summary is clear and concise, avoiding jargon or overly complex terms.


In [5]:
def calculate_clarity_and_conciseness(text):
    readability_score = flesch_reading_ease(text)
    return readability_score

# Example usage:
text = "This is an example sentence for measuring clarity and conciseness."
clarity_score = calculate_clarity_and_conciseness(text)
print(f"Clarity and Conciseness Score: {clarity_score:.2f}")


Clarity and Conciseness Score: 35.95


## Relevance
Assess whether the summary aligns with its intended purpose. For example, if the summary is meant to provide an overview, it should focus on key points.

In [6]:
def calculate_relevance(original_text, summary):
    vectorizer = CountVectorizer().fit_transform([original_text, summary])
    vectors = vectorizer.toarray()
    relevance_score = cosine_similarity([vectors[0]], [vectors[1]])[0][0]
    return relevance_score

relevance_score = calculate_relevance(full_doc_text, summarized_doc_text)
print(f"Relevance Score: {relevance_score:.2f}")

Relevance Score: 0.96


## Engagement
If applicable, track user engagement with the summary, such as click-through rates or time spent reading. Higher engagement may indicate better quality.

In [7]:
def calculate_engagement(text):
    sentiment_score = TextBlob(text).sentiment.polarity
    return sentiment_score

engagement_score = calculate_engagement(summarized_doc_text)
print(f"Engagement Score: {engagement_score:.2f}")


Engagement Score: 0.17


## Coherence
Evaluate the use of transition words and phrases to ensure that the summary flows smoothly between sentences and paragraphs.

In [8]:
def calculate_coherence_score(summarized_text, num_topics=2):
    # Tokenize the summarized text into a list of sentences
    sentences = [sentence.strip() for sentence in summarized_text.split('.') if sentence]

    # Tokenize sentences into words
    tokenized_sentences = [sentence.split() for sentence in sentences]

    # Create a dictionary and a corpus
    dictionary = Dictionary(tokenized_sentences)
    corpus = [dictionary.doc2bow(tokens) for tokens in tokenized_sentences]

    # Build an LDA model on the corpus
    lda_model = LdaModel(corpus, num_topics=num_topics, id2word=dictionary)

    # Calculate coherence score
    coherence_model_lda = CoherenceModel(model=lda_model, texts=tokenized_sentences, dictionary=dictionary, coherence='c_v')
    coherence_score = coherence_model_lda.get_coherence()

    return coherence_score

# Example usage:

coherence_score = calculate_coherence_score(summarized_doc_text)
print(f"Coherence Score: {coherence_score:.2f}")


Coherence Score: 0.35
