In [1]:
pip install pandas nltk textblob gensim scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
df = pd.read_excel(r'C:\Users\HP\Desktop\Assignment\Assignment.xlsx')

print(df.head())


                                             Article
0  Retailers, the makers of foods marketed for we...
1  Move over, Ozempic — there’s a new drug in tow...
2  Sept 14 (Reuters) - Bristol Myers Squibb (BMY....
3  Austin Wolcott was 18 years old and pretty sur...
4  Cancer, often referred to as the “emperor of a...


In [7]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from textblob import TextBlob
from gensim import corpora, models
import nltk

In [5]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [8]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    words = word_tokenize(text)
    words = [word for word in words if word.lower() not in stop_words]
    cleaned_text = ' '.join(words)
    return cleaned_text

df['cleaned_text'] = df['Article'].apply(clean_text)

print(df['cleaned_text'].head())


0    Retailers makers foods marketed weight loss ty...
1    Move Ozempic theres new drug town Eli Lillys Z...
2    Sept 14 Reuters Bristol Myers Squibb BMYN said...
3    Austin Wolcott 18 years old pretty sure wouldn...
4    Cancer often referred emperor maladies unyield...
Name: cleaned_text, dtype: object


In [9]:
from textblob import TextBlob

def get_sentiment(text):
    blob = TextBlob(text)
    if blob.sentiment.polarity > 0:
        return 'Positive'
    elif blob.sentiment.polarity < 0:
        return 'Negative'
    else:
        return 'Neutral'

df['mood'] = df['cleaned_text'].apply(get_sentiment)
print(df[['cleaned_text', 'mood']].head())


                                        cleaned_text      mood
0  Retailers makers foods marketed weight loss ty...  Positive
1  Move Ozempic theres new drug town Eli Lillys Z...  Negative
2  Sept 14 Reuters Bristol Myers Squibb BMYN said...  Positive
3  Austin Wolcott 18 years old pretty sure wouldn...  Positive
4  Cancer often referred emperor maladies unyield...  Positive


In [10]:
from gensim import corpora, models
from nltk.tokenize import word_tokenize

tokenized_texts = [word_tokenize(text) for text in df['cleaned_text']]
dictionary = corpora.Dictionary(tokenized_texts)
corpus = [dictionary.doc2bow(text) for text in tokenized_texts]
lda_model = models.LdaModel(corpus, num_topics=3, id2word=dictionary, passes=10)

for idx, topic in lda_model.print_topics(-1):
    print(f'Topic {idx}: {topic}')


Topic 0: 0.014*"account" + 0.008*"BRUKINSA" + 0.007*"Wells" + 0.006*"patients" + 0.006*"opening" + 0.006*"Fargo" + 0.005*"bonus" + 0.004*"FL" + 0.004*"approved" + 0.004*"obinutuzumab"
Topic 1: 0.009*"CART" + 0.007*"cancer" + 0.007*"Nike" + 0.006*"therapy" + 0.006*"Strava" + 0.006*"Taco" + 0.006*"says" + 0.005*"cells" + 0.004*"new" + 0.004*"patients"
Topic 2: 0.012*"Nike" + 0.008*"firm" + 0.005*"said" + 0.005*"also" + 0.005*"market" + 0.005*"believe" + 0.005*"products" + 0.004*"company" + 0.004*"sales" + 0.003*"Nikes"


In [12]:
def aspect_sentiment(text, aspect):
    blob = TextBlob(text)
    sentences = blob.sentences
    aspect_sentences = [sentence for sentence in sentences if aspect in sentence.lower()]
    if len(aspect_sentences) == 0:
        return 'Neutral' 
    aspect_polarity = sum(sentence.sentiment.polarity for sentence in aspect_sentences) / len(aspect_sentences)
    return 'Positive' if aspect_polarity > 0 else 'Negative' if aspect_polarity < 0 else 'Neutral'

df['cost_sentiment'] = df['cleaned_text'].apply(lambda x: aspect_sentiment(x, 'cost'))
print(df[['cleaned_text', 'cost_sentiment']].head())


                                        cleaned_text cost_sentiment
0  Retailers makers foods marketed weight loss ty...       Positive
1  Move Ozempic theres new drug town Eli Lillys Z...       Negative
2  Sept 14 Reuters Bristol Myers Squibb BMYN said...        Neutral
3  Austin Wolcott 18 years old pretty sure wouldn...       Positive
4  Cancer often referred emperor maladies unyield...       Positive


In [14]:
df['summary'] = df['cleaned_text'].apply(lambda x: ' '.join(word_tokenize(x)[:10]))

df.to_excel(r'C:\Users\HP\Desktop\Assignment\news_analysis_output.xlsx', index=False)

print(df.head())


                                             Article  \
0  Retailers, the makers of foods marketed for we...   
1  Move over, Ozempic — there’s a new drug in tow...   
2  Sept 14 (Reuters) - Bristol Myers Squibb (BMY....   
3  Austin Wolcott was 18 years old and pretty sur...   
4  Cancer, often referred to as the “emperor of a...   

                                        cleaned_text      mood cost_sentiment  \
0  Retailers makers foods marketed weight loss ty...  Positive       Positive   
1  Move Ozempic theres new drug town Eli Lillys Z...  Negative       Negative   
2  Sept 14 Reuters Bristol Myers Squibb BMYN said...  Positive        Neutral   
3  Austin Wolcott 18 years old pretty sure wouldn...  Positive       Positive   
4  Cancer often referred emperor maladies unyield...  Positive       Positive   

                                             summary  
0  Retailers makers foods marketed weight loss ty...  
1  Move Ozempic theres new drug town Eli Lillys Z...  
2  Sept 14 