<a href="https://colab.research.google.com/github/Ranjan4Kumar/WebScrapping-Using-BS4/blob/main/URL_ID_41.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bs4
!pip install html5lib
!pip install requests

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://insights.blackcoffer.com/will-ai-replace-us-or-work-with-us/"

# Send a GET request to the URL
response = requests.get(url)

# Create BeautifulSoup object
soup = BeautifulSoup(response.content, 'html.parser')

# Find the article text
content = soup.find("div", class_="td-post-content")

# Extract the text
text = content.get_text().strip()

# Split the text into paragraphs
paragraphs = text.split("\n\n")

# Print each paragraph separately
for paragraph in paragraphs:
    print(paragraph)
    print()


“Machine intelligence is the last invention that humanity will ever need to make”
Nick Bostrom

To put it frankly, Artificial Intelligence will eventually replace jobs. Workers in a variety of industries, from healthcare to agriculture and manufacturing, should expect to witness hiring disruptions as a result of Artificial Intelligence.
If history has taught us anything, it is that disruptive paradigm-shifting business ideas not only make a fortune for the innovators, but they also build the groundwork for new business models, market entrants, and job opportunities which will inevitably follow. It is true that robots today or in future will eventually replace humans for many jobs, but so did innovative farming equipment for humans and horses during the industrial revolution. But that does not mean that our jobs as humans will end here. We, on the other hand, will be required to generate and provide value in whole new ways for entirely new business models as a result of these changes.
A

In [None]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from collections import defaultdict
from textblob import TextBlob

nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


# Stop words files
stopword_files = [
    "/content/StopWords_Auditor.txt",
    "/content/StopWords_Currencies.txt",
    "/content/StopWords_DatesandNumbers.txt",
    "/content/StopWords_Generic.txt",
    "/content/StopWords_GenericLong.txt",
    "/content/StopWords_Geographic.txt",
    "/content/StopWords_Names.txt"
]

# Load stop words from multiple files
stop_words = set()
for file in stopword_files:
    with open(file, "r", errors="replace") as f:
        encoding = "utf-8"  # Start with UTF-8 encoding
        try:
            stop_words.update(f.read().splitlines())
        except UnicodeDecodeError:
            # If UTF-8 fails, try different encodings
            encodings_to_try = ["latin-1", "iso-8859-1"]
            for encoding in encodings_to_try:
                try:
                    with open(file, "r", encoding=encoding) as f_retry:
                        stop_words.update(f_retry.read().splitlines())
                        break
                except UnicodeDecodeError:
                    pass

# Positive and negative words files
positive_words_file = '/content/positive-words.txt'
negative_words_file = '/content/negative-words.txt'

# Load the positive words
with open(positive_words_file, "r", encoding="utf-8") as file:
    positive_words = [word.strip() for word in file.readlines()]

# Load the negative words
with open(negative_words_file, "r", encoding="latin-1") as file:
    negative_words = [word.strip() for word in file.readlines()]

# Initialize the SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Remove stop words from the text data
words = word_tokenize(paragraph.lower())
filtered_words = [word for word in words if word not in stop_words]

# Create a dictionary of positive and negative words
word_sentiment = defaultdict(int)
for word in filtered_words:
    if word in positive_words:
        word_sentiment[word] = 1
    elif word in negative_words:
        word_sentiment[word] = -1

#Applying model on the data
sentiment_scores = sia.polarity_scores(paragraph)
positive_word_count = sum(value == 1 for value in word_sentiment.values())
negative_word_count = sum(value == -1 for value in word_sentiment.values())
# TextBlob
blob = TextBlob(paragraph)

polarity_score = sentiment_scores['compound']
subjectivity_score = blob.sentiment.subjectivity
avg_sentence_length = len(words) / len(blob.sentences)
percentage_complex_words = (len([word for word in filtered_words if len(word) > 2]) / len(filtered_words)) * 100
fog_index = 0.4 * (avg_sentence_length + percentage_complex_words)
avg_words_per_sentence = len(words) / len(blob.sentences)
complex_word_count = len([word for word in filtered_words if len(word) > 2])
word_count = len(filtered_words)
syllables_per_word = blob.sentiment_assessments.assessments[0][1]
personal_pronouns = sum(1 for word in blob.tags if word[1] == 'PRP')
avg_word_length = sum(len(word) for word in filtered_words) / len(filtered_words)

# Print
print("positive_word_count:",positive_word_count)
print("negative_word_count:",negative_word_count)
print("polarity_score:",polarity_score)
print("subjectivity_score:",subjectivity_score)
print("avg_sentence_length :",avg_sentence_length )
print("percentage_complex_words:",percentage_complex_words)
print("fog_index:",fog_index)
print("avg_words_per_sentence:",avg_words_per_sentence)
print("complex_word_count:",complex_word_count)
print("word_count:",word_count)
print("syllables_per_word",syllables_per_word)
print("personal_pronouns:",personal_pronouns)
print("avg_word_length",avg_word_length)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


positive_word_count: 18
negative_word_count: 15
polarity_score: 0.9972
subjectivity_score: 0.5048980717863696
avg_sentence_length : 27.3
percentage_complex_words: 76.39123102866779
fog_index: 41.47649241146712
avg_words_per_sentence: 27.3
complex_word_count: 453
word_count: 593
syllables_per_word -0.1
personal_pronouns: 27
avg_word_length 5.780775716694772
