In [6]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import nltk
from textblob import TextBlob
import syllables

In [7]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ron68\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\ron68\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [11]:
url = "https://insights.blackcoffer.com/ai-in-healthcare-to-improve-patient-outcomes/"
page = requests.get(url)
htmlcontent = page.content
soup = BeautifulSoup(htmlcontent, 'html.parser')

In [12]:
title = soup.title.text
content = soup.find(attrs={'class': 'vc_row tdi_118 td-ss-row wpb_row td-pb-row'})
# YOU CAN SEE YOUR WEB-PAGE CLASS BY SIMPLY INSPECTING YOU WEB-PAGE AND ENTER CLASS OF CONTENT ACCORDINGLY 
content_text = content.text.replace("\n", "")

In [13]:
# Perform textual analysis
blob = TextBlob(content_text)

In [14]:
positive_score = sum([1 for sentence in blob.sentences if sentence.sentiment.polarity > 0])
negative_score = sum([1 for sentence in blob.sentences if sentence.sentiment.polarity < 0])
polarity_score = blob.sentiment.polarity
subjectivity_score = blob.sentiment.subjectivity

In [15]:
sentences = nltk.sent_tokenize(content_text)
avg_sentence_length = sum([len(nltk.word_tokenize(sentence)) for sentence in sentences]) / len(sentences)

In [16]:
word_count = len(blob.words)
complex_word_count = sum([1 for word in blob.words if len(word) > 2 and len(nltk.pos_tag([word])[0][1]) >= 2])
percentage_complex_words = (complex_word_count / word_count) * 100

In [17]:
avg_words_per_sentence = word_count / len(sentences)

In [18]:
syllable_count = sum([syllables.estimate(word) for word in blob.words])  # Count syllables using syllables.estimate()
syllables_per_word = syllable_count / word_count

In [19]:
personal_pronouns = sum([1 for word, tag in nltk.pos_tag(blob.words) if tag == 'PRP'])

In [20]:
avg_word_length = sum([len(word) for word in blob.words]) / word_count

In [21]:
# Calculate FOG Index
complex_words = [word for word in blob.words if len(word) > 2 and len(nltk.pos_tag([word])[0][1]) >= 2]
fog_index = 0.4 * (avg_words_per_sentence + percentage_complex_words)

In [24]:
# Save analysis results to a text file
file_name = input("Enter the file name to save analysis results: ")

with open(file_name, "w", encoding="utf-8") as file:
    file.write(f"Title: {title}\n\n")
    file.write(f"Content: {content_text}\n\n")
    file.write(f"Positive Score: {positive_score}\n")
    file.write(f"Negative Score: {negative_score}\n")
    file.write(f"Polarity Score: {polarity_score}\n")
    file.write(f"Subjectivity Score: {subjectivity_score}\n")
    file.write(f"Avg Sentence Length: {avg_sentence_length}\n")
    file.write(f"Percentage of Complex Words: {percentage_complex_words}\n")
    file.write(f"Avg Number of Words per Sentence: {avg_words_per_sentence}\n")
    file.write(f"Complex Word Count: {complex_word_count}\n")
    file.write(f"FOG Index: {fog_index}\n")
    file.write(f"Word Count: {word_count}\n")
    file.write(f"Syllables per Word: {syllables_per_word}\n")
    file.write(f"Personal Pronouns: {personal_pronouns}\n")
    file.write(f"Avg Word Length: {avg_word_length}\n")

print("Textual analysis completed. Results saved to file",file_name)

Enter the file name to save analysis results: rough
Textual analysis completed. Results saved to file rough
