In [3]:
pip install spacy pandas numpy matplotlib seaborn scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [10]:
import spacy 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from collections import Counter

nlp = spacy.load('en_core_web_sm')

print("Model loaded successfully")

Model loaded successfully


In [11]:
topic = "Artificial Intelligence in Healthcare"

draft_text = """
Artificial Intelligence is revolutionizing healthcare. 
It is always beneficial and will completely replace doctors in the future. 
AI systems are used in diagnosis and treatment planning. 
Many hospitals are adopting machine learning rapidly.
"""

doc = nlp(draft_text)

doc


Artificial Intelligence is revolutionizing healthcare. 
It is always beneficial and will completely replace doctors in the future. 
AI systems are used in diagnosis and treatment planning. 
Many hospitals are adopting machine learning rapidly.

In [14]:
sentences = list(doc.sents)
tokens = [token for token in doc if not token.is_punct]

num_sentences = len(sentences)
num_words = len(tokens)
avg_sentence_length = num_words / num_sentences

unique_words = len(set([token.lemma_.lower() for token in tokens]))
vocab_richness = unique_words / num_words

print("Sentences:", num_sentences)
print("Words:", num_words)
print("Avg sentence length:", round(avg_sentence_length,2))
print("Vocabulary richness:", round(vocab_richness,2))


Sentences: 4
Words: 38
Avg sentence length: 9.5
Vocabulary richness: 0.76


In [17]:
clean_token = [
    token.lemma_.lower()
    for token in doc
    if not token.is_stop and not token.is_punct
]

word_freq = Counter(clean_token)

print("Top 10 words")
print(word_freq.most_common(10))

Top 10 words
[('\n', 5), ('artificial', 1), ('intelligence', 1), ('revolutionize', 1), ('healthcare', 1), ('beneficial', 1), ('completely', 1), ('replace', 1), ('doctor', 1), ('future', 1)]


In [18]:
pos_counts = Counter([token.pos_ for token in doc])
print("POS Distribution:")
print(pos_counts)

POS Distribution:
Counter({'NOUN': 10, 'SPACE': 5, 'AUX': 5, 'VERB': 4, 'PUNCT': 4, 'PROPN': 3, 'ADV': 3, 'ADJ': 2, 'CCONJ': 2, 'ADP': 2, 'PRON': 1, 'DET': 1})


In [20]:
entities = [(ent.text, ent.label_) for ent in doc.ents]

print('Entities:')
print(entities)

Entities:
[('Artificial Intelligence', 'PERSON')]


In [23]:
noun_chunks = [chunk.text for chunk in doc.noun_chunks]
print("Key Phrases:")
print(noun_chunks)

Key Phrases:
['\nArtificial Intelligence', 'healthcare', 'It', 'doctors', 'the future', 'AI systems', 'diagnosis and treatment planning', 'Many hospitals', 'machine learning']


In [24]:
passive_sentences = []

for token in doc:
    if token.dep_ == "nsubjpass":
        passive_sentences.append(token.sent.text)

print("Passive Sentences:")
print(passive_sentences)

Passive Sentences:
['AI systems are used in diagnosis and treatment planning. \n']


In [25]:
topic_doc = nlp(topic)

similarity_score = topic_doc.similarity(doc)
print("Topic Relevance Score:", round(similarity_score,2))

Topic Relevance Score: 0.23


  similarity_score = topic_doc.similarity(doc)


In [26]:
#Bias Detection
bias_words = ["always", "never", "completely", "disaster", "amazing"]

found_bias = [
    token.text for token in doc
    if token.text.lower() in bias_words
]

print("Potential Bias Words:")
print(found_bias)

Potential Bias Words:
['always', 'completely']


In [27]:
score = 100

if avg_sentence_length > 25:
    score -= 10

if len(passive_sentences) > 0:
    score -= 10

if len(found_bias) > 0:
    score -= 10

print("Writing Quality Score:", score)

Writing Quality Score: 80
