In [1]:
import pandas as pd
import string
import nltk
import requests
import pyphen
from bs4 import BeautifulSoup
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
# Function to extract text content from a given URL
def extract_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    #soup = BeautifulSoup(response.content, "html.parser")
    #return soup(get_text)
    text = ' '.join([p.get_text() for p in soup.find_all('p')])
    return text

In [3]:
def remove_stopwords(text):
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    filtered_text = ' '.join(filtered_tokens)
    return filtered_text

In [4]:
def calculate_avg_sentence_length(text):
    sentences = TextBlob(text).sentences
    total_words = sum([len(sentence.words) for sentence in sentences])
    num_sentences = len(sentences)
    avg_sentence_length = total_words / num_sentences
    return avg_sentence_length

In [5]:
def calculate_complex_word_percentage(text):
    words = TextBlob(text).words
    dic = pyphen.Pyphen(lang='en')
    num_complex_words = sum([1 for word in words if len(dic.inserted(word).split('-')) > 2])
    total_words = len(words)
    complex_word_percentage = (num_complex_words / total_words) * 100
    return complex_word_percentage


In [6]:
def calculate_fog_index(avg_sentence_length, complex_word_percentage):
    fog_index = 0.4 * (avg_sentence_length + complex_word_percentage)
    return fog_index


In [7]:
def calculate_avg_words_per_sentence(text):
    sentences = TextBlob(text).sentences
    total_words = sum([len(sentence.words) for sentence in sentences])
    num_sentences = len(sentences)
    avg_words_per_sentence = total_words / num_sentences
    return avg_words_per_sentence


In [8]:
def calculate_complex_word_count(text):
    words = TextBlob(text).words
    dic = pyphen.Pyphen(lang='en')
    complex_words = [word for word in words if len(dic.inserted(word).split('-')) > 2]
    complex_word_count = len(complex_words)
    return complex_word_count

In [9]:
def calculate_word_count(text):
    words = TextBlob(text).words
    word_count = len(words)
    return word_count

In [10]:
def calculate_avg_word_length(text):
    words = TextBlob(text).words
    total_word_length = sum([len(word) for word in words])
    num_words = len(words)
    avg_word_length = total_word_length / num_words
    return avg_word_length

In [11]:
def calculate_syllables_per_word(text):
    words = TextBlob(text).words
    dic = pyphen.Pyphen(lang='en')
    syllables_per_word = [len(dic.inserted(word).split('-')) for word in words]
    return syllables_per_word

In [12]:
def calculate_avg_syllable_count(text):
    words = TextBlob(text).words
    dic = pyphen.Pyphen(lang='en')
    total_syllables = sum([len(dic.inserted(word).split('-')) for word in words])
    num_words = len(words)
    avg_syllable_count = total_syllables / num_words
    return avg_syllable_count

In [13]:
def calculate_personal_pronouns_count(text):
    words = TextBlob(text).words
    tagged_words = nltk.pos_tag(words)
    personal_pronouns = [word for word, pos in tagged_words if pos == 'PRP']
    personal_pronouns_count = len(personal_pronouns)
    return personal_pronouns_count

In [14]:
url = 'https://insights.blackcoffer.com/how-google-fit-measure-heart-and-respiratory-rates-using-a-phone/'  # Replace with the desired URL
text = extract_text_from_url(url)


In [15]:
lower_case=text.lower()
#lower_case


In [16]:
clean_text=lower_case.translate(str.maketrans("","",string.punctuation))
#clean_text

In [17]:
tokenized_words=clean_text.split()
#tokenized_words

In [18]:
stop_words = set(stopwords.words('english'))

In [19]:
final_words=[]
for i in tokenized_words:
    if i not in stop_words:
        final_words.append(i)
final_words

['ranking',
 'customer',
 'behaviours',
 'business',
 'strategy',
 'algorithmic',
 'trading',
 'multiple',
 'commodities',
 'markets',
 'like',
 'forex',
 'metals',
 'energy',
 'etc',
 'trading',
 'bot',
 'forex',
 'python',
 'model',
 'analysis',
 'sectorspecific',
 'stock',
 'etfs',
 'investment',
 'purposes',
 'autogpt',
 'setup',
 'playstore',
 'appstore',
 'google',
 'analytics',
 'ga',
 'firebase',
 'google',
 'data',
 'studio',
 'mobile',
 'app',
 'kpi',
 'dashboard',
 'google',
 'local',
 'service',
 'ads',
 'lsa',
 'api',
 'google',
 'bigquery',
 'google',
 'data',
 'studio',
 'ai',
 'conversational',
 'bot',
 'using',
 'rasa',
 'rise',
 'telemedicine',
 'impact',
 'livelihood',
 '2040',
 'rise',
 'ehealth',
 'impact',
 'humans',
 'year',
 '2030',
 'rise',
 'ehealth',
 'impact',
 'humans',
 'year',
 '2030',
 'rise',
 'telemedicine',
 'impact',
 'livelihood',
 '2040',
 'aiml',
 'predictive',
 'modeling',
 'solution',
 'contact',
 'centre',
 'problems',
 'setup',
 'custom',
 'do

In [20]:
emotions_list=[]
with open('emotions.txt') as file:
    for line in file:
        clear_line=line.replace("\n",'').replace(",","").replace("'","").strip()
        word,emotion=clear_line.split(":")
        if word in final_words:
            emotions_list.append(emotion)
            
print(emotions_list)

[]


In [21]:
from collections import Counter
w=Counter(emotions_list)
w

Counter()

In [22]:
filtered=remove_stopwords(clean_text)

In [23]:
avg_sentence_length = calculate_avg_sentence_length(text)

complex_word_percentage = calculate_complex_word_percentage(text)

fog_index = calculate_fog_index(avg_sentence_length, complex_word_percentage)

avg_words_per_sentence = calculate_avg_words_per_sentence(text)

complex_word_count = calculate_complex_word_count(text)

word_count = calculate_word_count(filtered)

avg_word_length = calculate_avg_word_length(text)

syllables_per_word = calculate_syllables_per_word(filtered)

avg_syllable_count = calculate_avg_syllable_count(filtered)

personal_pronouns_count = calculate_personal_pronouns_count(text)

In [24]:
def analyze(senti_text):
    score=SentimentIntensityAnalyzer().polarity_scores(clean_text)
    neg=score['neg']
    pos=score['pos']
    blob = TextBlob(text)
    sentiment_scores = blob.sentiment
    polarity = sentiment_scores.polarity
    subjectivity = sentiment_scores.subjectivity
    print("positive score:", pos)
    print("negetive score:", neg)
    print('Polarity Score:', polarity)
    print('Subjectivity Score:', subjectivity)
    print('Avg sentence length:', avg_sentence_length)
    print('Complex percentage:', complex_word_percentage)
    print('fog index:', fog_index)
    print('avg words per sentence:', avg_words_per_sentence)
    print('Complex count:', complex_word_count)
    print('Word count:', word_count)
    #print('Syllable per word:', syllables_per_word)
    print('Avg syllable per word:', avg_syllable_count)
    print('personal pronouns:', personal_pronouns_count)
    print('Avg word length:', avg_word_length)

In [25]:
analyze(text)

positive score: 0.103
negetive score: 0.003
Polarity Score: 0.12757936507936507
Subjectivity Score: 0.41633597883597884
Avg sentence length: 26.714285714285715
Complex percentage: 12.121212121212121
fog index: 15.534199134199135
avg words per sentence: 26.714285714285715
Complex count: 68
Word count: 355
Avg syllable per word: 1.7352112676056337
personal pronouns: 6
Avg word length: 5.1319073083778965


In [26]:
def analyze_urls_from_excel(file_path):
    df = pd.read_excel(file_path)  # Read the Excel sheet with URLs
    
    # Create an empty DataFrame to store the results
    results_df = pd.DataFrame(columns=['URL', 'Positive Score', 'Negetive Score', 'Polarity Score', 'Subjectivity Score',
                                       'Avg Sentence Length', 'Complex Word Percentage',
                                       'FOG Index', 'Avg Words per Sentence', 'Complex Word Count',
                                       'Word Count', 'Avg Word Length', 'Avg Syllable per Word',
                                       'Personal Pronouns Count'])
    for url in df['URL']:  # Iterate over the URLs
        text = extract_text_from_url(url)
        lower_case=text.lower()
        clean_text=lower_case.translate(str.maketrans("","",string.punctuation))
        #clean_text = remove_stopwords(clean_text)
        filtered=remove_stopwords(clean_text)
        
        score=SentimentIntensityAnalyzer().polarity_scores(clean_text)
        neg=score['neg']
        pos=score['pos']
        blob = TextBlob(text)
        sentiment_scores = blob.sentiment
        polarity = sentiment_scores.polarity
        subjectivity = sentiment_scores.subjectivity
        
        avg_sentence_length = calculate_avg_sentence_length(text)
        complex_word_percentage = calculate_complex_word_percentage(text)
        fog_index = calculate_fog_index(avg_sentence_length, complex_word_percentage)
        avg_words_per_sentence = calculate_avg_words_per_sentence(text)
        complex_word_count = calculate_complex_word_count(text)
        word_count = calculate_word_count(filtered)
        avg_word_length = calculate_avg_word_length(text)
        avg_syllable_count = calculate_avg_syllable_count(filtered)
        personal_pronouns_count = calculate_personal_pronouns_count(text)

        # Append the results to the DataFrame
        results_df = results_df.append({
            'URL': url,
            'Positive Score': pos,
            'Negetive score': neg,
            'Polarity Score': polarity,
            'Subjectivity Score': subjectivity,
            'Avg Sentence Length': avg_sentence_length,
            'Complex Word Percentage': complex_word_percentage,
            'FOG Index': fog_index,
            'Avg Words per Sentence': avg_words_per_sentence,
            'Complex Word Count': complex_word_count,
            'Word Count': word_count,
            'Avg Word Length': avg_word_length,
            'Avg Syllable per Word': avg_syllable_count,
            'Personal Pronouns Count': personal_pronouns_count
        }, ignore_index=True)
        
    results_file_path = 'results.xlsx'
    results_df.to_excel(results_file_path, index=False)
    print(f"Results saved to '{results_file_path}'.")

In [27]:
file_path = 'Input.xlsx'  # Replace with the actual file path
analyze_urls_from_excel(file_path)

  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({


Results saved to 'results.xlsx'.
