## Sentiment Analysis on News Channels

In [13]:
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D
from tensorflow.keras.layers import Embedding

df = pd.read_csv("./Tweets.csv")

In [14]:
df.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)


In [15]:
df.columns

Index(['tweet_id', 'airline_sentiment', 'airline_sentiment_confidence',
       'negativereason', 'negativereason_confidence', 'airline',
       'airline_sentiment_gold', 'name', 'negativereason_gold',
       'retweet_count', 'text', 'tweet_coord', 'tweet_created',
       'tweet_location', 'user_timezone'],
      dtype='object')

In [16]:
tweet_df = df[['text','airline_sentiment']]
print(tweet_df.shape)
tweet_df.head(5)

(14640, 2)


Unnamed: 0,text,airline_sentiment
0,@VirginAmerica What @dhepburn said.,neutral
1,@VirginAmerica plus you've added commercials t...,positive
2,@VirginAmerica I didn't today... Must mean I n...,neutral
3,@VirginAmerica it's really aggressive to blast...,negative
4,@VirginAmerica and it's a really big bad thing...,negative


In [17]:
tweet_df = tweet_df[tweet_df['airline_sentiment'] != 'neutral']
print(tweet_df.shape)
tweet_df.head(5)

(11541, 2)


Unnamed: 0,text,airline_sentiment
1,@VirginAmerica plus you've added commercials t...,positive
3,@VirginAmerica it's really aggressive to blast...,negative
4,@VirginAmerica and it's a really big bad thing...,negative
5,@VirginAmerica seriously would pay $30 a fligh...,negative
6,"@VirginAmerica yes, nearly every time I fly VX...",positive


In [18]:
tweet_df["airline_sentiment"].value_counts()

negative    9178
positive    2363
Name: airline_sentiment, dtype: int64

In [19]:
sentiment_label = tweet_df.airline_sentiment.factorize()
sentiment_label

(array([0, 1, 1, ..., 0, 1, 1], dtype=int64),
 Index(['positive', 'negative'], dtype='object'))

In [20]:
tweet = tweet_df.text.values
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(tweet)
vocab_size = len(tokenizer.word_index) + 1
encoded_docs = tokenizer.texts_to_sequences(tweet)
padded_sequence = pad_sequences(encoded_docs, maxlen=200)

In [21]:
# print(tokenizer.word_index)

In [22]:
print(tweet[0])
print(encoded_docs[0])

@VirginAmerica plus you've added commercials to the experience... tacky.
[103, 575, 530, 1287, 2416, 1, 2, 177]


In [23]:
print(padded_sequence[0])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0  103  575  530 1287
 2416 

In [24]:
embedding_vector_length = 32
model = Sequential() 
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200) )
model.add(SpatialDropout1D(0.25))
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) 
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])  
print(model.summary()) 

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 32)           423488    
                                                                 
 spatial_dropout1d (SpatialD  (None, 200, 32)          0         
 ropout1D)                                                       
                                                                 
 lstm (LSTM)                 (None, 50)                16600     
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 440,139
Trainable params: 440,139
Non-trainable params: 0
__________________________________________________

In [None]:
# training

history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=32)

In [35]:
def predict_sentiment(text):
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    return sentiment_label[1][prediction]

In [61]:
# Testing the model

print(predict_sentiment("6 KG drugs worth Rs 35 crores recovered in Rajasthan") )

print(predict_sentiment("World Cancer Day: Cancer Survivors ramp walks with IDT Students to spread awareness in Surat") )

print(predict_sentiment("Union Minister Pralhad Joshi underlines importance of energy conservation with reference from Rigveda") )

print(predict_sentiment("Massive protest erupts against Nitish Kumar; villagers raise slogans against Bihar CM during ‘Samadhan Yatra") )

negative
positive
positive
negative


### Scraping information of news websites

In [2]:
import requests
from bs4 import BeautifulSoup
import re
seed_toi="https://timesofindia.indiatimes.com/videos/news"
a=set()
a.add(seed_toi)
for i in range(2,11):
    x=seed_toi+"/"+str(i)
    a.add(x)
for i in a:    
    print(i)


https://timesofindia.indiatimes.com/videos/news/9
https://timesofindia.indiatimes.com/videos/news/4
https://timesofindia.indiatimes.com/videos/news/10
https://timesofindia.indiatimes.com/videos/news/7
https://timesofindia.indiatimes.com/videos/news/5
https://timesofindia.indiatimes.com/videos/news/3
https://timesofindia.indiatimes.com/videos/news
https://timesofindia.indiatimes.com/videos/news/6
https://timesofindia.indiatimes.com/videos/news/8
https://timesofindia.indiatimes.com/videos/news/2


In [3]:
toi=set()
for url in a:
    #print(url)
    r=requests.get(url)
    soup=BeautifulSoup(r.content,'html.parser')
    #print(soup.prettify())
    for i in soup.find_all('div',class_='_3sL7K'):
        anchor_tags = i.find_all('a') 
        for anchor_tag in anchor_tags : 
            title = anchor_tag['title'] 
            toi.add(title)
for i in toi:
    print(i)

Proud to see PM Modi sending aid to Turkey despite it being Pakistan's sympathizer: Manoj Tiwari
BJP workers protest against Kerala state budget in Kochi
Rachana Reddy accuses Opposition of wasting precious Parliamentary hours on irrelevant issues
Researchers find eye-tracking test to quantify toddlers’ level of attention to motherese
PM Narendra Modi to Opposition MPs: 'The more you throw 'keechad', the better lotus will bloom'
Child marriage crackdown: Shouldn’t look like action is against one community, says Maulana Madani
Darshan at Shree Somnath Temple, First Jyotirlinga, 09-February - 2023
PM Narendra Modi speaks on Gati Shakti master plan in Rajya Sabha
The jacket PM Modi wore in Parliament is made of recycled plastic: IOCL Chairman
After Meta and Twitter, Disney fires 7000 employees in a major revamp
UP CM takes stock of G20 meeting preparations in Lucknow
Mahila Congress workers protest against union budget in Kerala
“Disrespectful to the Armed Forces”: Defence Expert Dhruv Ka

In [4]:
seed_toi="https://timesofindia.indiatimes.com/videos/news"
a=set()
a.add(seed_toi)
for i in range(2,11):
    x=seed_toi+"/"+str(i)
    a.add(x)
for i in a:    
    print(i)

https://timesofindia.indiatimes.com/videos/news/9
https://timesofindia.indiatimes.com/videos/news/4
https://timesofindia.indiatimes.com/videos/news/10
https://timesofindia.indiatimes.com/videos/news/7
https://timesofindia.indiatimes.com/videos/news/5
https://timesofindia.indiatimes.com/videos/news/3
https://timesofindia.indiatimes.com/videos/news
https://timesofindia.indiatimes.com/videos/news/6
https://timesofindia.indiatimes.com/videos/news/8
https://timesofindia.indiatimes.com/videos/news/2


In [5]:
toi=set()
for url in a:
    #print(url)
    r=requests.get(url)
    soup=BeautifulSoup(r.content,'html.parser')
    #print(soup.prettify())
    for i in soup.find_all('div',class_='_3sL7K'):
        anchor_tags = i.find_all('a') 
        for anchor_tag in anchor_tags : 
            title = anchor_tag['title'] 
            toi.add(title)
for i in toi:
    print(i)

Proud to see PM Modi sending aid to Turkey despite it being Pakistan's sympathizer: Manoj Tiwari
BJP workers protest against Kerala state budget in Kochi
Rachana Reddy accuses Opposition of wasting precious Parliamentary hours on irrelevant issues
Researchers find eye-tracking test to quantify toddlers’ level of attention to motherese
PM Narendra Modi to Opposition MPs: 'The more you throw 'keechad', the better lotus will bloom'
Child marriage crackdown: Shouldn’t look like action is against one community, says Maulana Madani
Darshan at Shree Somnath Temple, First Jyotirlinga, 09-February - 2023
PM Narendra Modi speaks on Gati Shakti master plan in Rajya Sabha
The jacket PM Modi wore in Parliament is made of recycled plastic: IOCL Chairman
After Meta and Twitter, Disney fires 7000 employees in a major revamp
UP CM takes stock of G20 meeting preparations in Lucknow
Mahila Congress workers protest against union budget in Kerala
“Disrespectful to the Armed Forces”: Defence Expert Dhruv Ka

In [7]:
x="https://www.ndtv.com/india/page-"
seed_ndtv=[]
for i in range(1,15):
    seed_ndtv.append(x+str(i))
for i in seed_ndtv:
    print(i)

https://www.ndtv.com/india/page-1
https://www.ndtv.com/india/page-2
https://www.ndtv.com/india/page-3
https://www.ndtv.com/india/page-4
https://www.ndtv.com/india/page-5
https://www.ndtv.com/india/page-6
https://www.ndtv.com/india/page-7
https://www.ndtv.com/india/page-8
https://www.ndtv.com/india/page-9
https://www.ndtv.com/india/page-10
https://www.ndtv.com/india/page-11
https://www.ndtv.com/india/page-12
https://www.ndtv.com/india/page-13
https://www.ndtv.com/india/page-14


In [8]:
ndtv=set()
for i in seed_ndtv:
    #print(i)
    r=requests.get(i)
    soup=BeautifulSoup(r.content,'html.parser')
    for i in soup.find_all('h2',class_='newsHdng'):
      anchor_tags = i.find_all('a') 
      #print(anchor_tags)
      for anchor_tag in anchor_tags : 
            text=anchor_tag.text
            ndtv.add(text)

for i in ndtv:
     print(i)

Need Central Act Regulating Online Gaming, Gambling: Minister In Lok Sabha
"Trust Of 140 Crore Indians Is My Shield": PM Hits Back At Opposition
Watch: "Why Were My Words Expunged," Asks Rahul Gandhi
Supreme Court Deletes "Foreign Origin" Reference To Sikkimese-Nepalis
Rajya Sabha Walkout By AAP, KCR Party, Team Thackeray As Notices Rejected
137 Students From Mangaluru College Hospitalised, Food Poisoning Suspected
India Is Biggest Friend During Crisis: Lanka PM
Smart TVs And Cellphones Drive "Smart Schools" In Madhya Pradesh District
"This Is Not 'Amrit Kaal', It's 'Zeher Kaal'": CPM Leader Hits Out At PM
Hotel Owners Among 5 Charged For Deaths Of 2 Construction Workers In Noida
669 Deaths In Police Custody In Last 5 years: Home Ministry
Stones Thrown At Aaditya Thackeray's Car In Aurangabad, Says Party
Poem Critical of Gandhi At School Event Sparks Row In Madhya Pradesh
Rare Moment Of Congress-BJP Camaraderie In Rajya Sabha Today
Centre Does Not Control Social Media Intermediaries: M

In [9]:
x="https://www.india.com/news/india/"
seed_india=[]
seed_india.append(x)
for i in range(1,15):
    seed_india.append(x+"page/"+str(i)+"/")
for i in seed_india:
    print(i)

https://www.india.com/news/india/
https://www.india.com/news/india/page/1/
https://www.india.com/news/india/page/2/
https://www.india.com/news/india/page/3/
https://www.india.com/news/india/page/4/
https://www.india.com/news/india/page/5/
https://www.india.com/news/india/page/6/
https://www.india.com/news/india/page/7/
https://www.india.com/news/india/page/8/
https://www.india.com/news/india/page/9/
https://www.india.com/news/india/page/10/
https://www.india.com/news/india/page/11/
https://www.india.com/news/india/page/12/
https://www.india.com/news/india/page/13/
https://www.india.com/news/india/page/14/


In [10]:
india=set()
for i in seed_india:
    #print(i)
    r=requests.get(i)
    soup=BeautifulSoup(r.content,'html.parser')
    for i in soup.find_all('figcaption',class_='text'):
       for j in i.find_all('h2'):  
        anchor_tags = i.find_all('a') 
        #print(anchor_tags)
        for anchor_tag in anchor_tags : 
                text=anchor_tag.text
                if len(text)>20:
                  india.add(text)

for i in india:
     print(i)

Union Budget 2023-24: How Can Android, iOS Users Download Union Budget Mobile App From indiabudget.gov.in| Step By Step Guide Here
Indian Army Changes Recruitment Process: Now Online Common Entrance Exam For Agniveers
Calicut-Bound Air India Express Flight Makes Emergency Landing at Abu Dhabi Airport
Supreme Court Gets 5 New Judges, CJI Chandrachud Administers Oath of Office
Massive Fire Breaks Out At Factory In Gujarat   s Umargam, Fire Tenders Rush To Spot
Drunk Man Bites Off Cop's Ear In Kerala
Air India Express Flight From Sharjah Makes Emergency Landing at Cochin Airport
RaGa Begins Battle For 2024 With 'Snow-Capped' Speech In Srinagar
Budget 2023: Nirmala Sitharaman Takes Out Her Brightest Red Silk Saree to Present Desh Ka Bahi-Khata - Viral Pics
Students Detained, Section 144 Imposed At Delhi University Ahead of BBC Documentary Screening
BBC Documentary Row: What Happened at JNU? 10 Points
'Great Loss...' CM Naveen Patnaik Expresses Shock Over Demise Of Odisha Health Minister; C

In [40]:
toi_list_pred=[]
ndtv_list_pred=[]
india_list_pred=[]       

### Predict results from News

In [41]:
len(toi),len(ndtv),len(india)

(208, 198, 183)

In [42]:
for i in toi:
    toi_list_pred.append(predict_sentiment(i))

for i in ndtv:
    ndtv_list_pred.append(predict_sentiment(i))

for i in india:
    india_list_pred.append(predict_sentiment(i))



In [56]:
toi_neg=0
toi_tot = len(toi_list_pred)
for i in toi_list_pred:
    if i=='negative':
        toi_neg=toi_neg+1

toi_pos = toi_tot-toi_neg

print("Total number of news articles in Times of India =", toi_tot)
print("toi_negative", toi_neg)
print("toi_postive", toi_pos)
print("Probablity distribution of Negative =", toi_neg/toi_tot,"and Positive =", toi_pos/toi_tot)


Total number of news articles in Times of India = 208
toi_negative 161
toi_postive 47
Probablity distribution of Negative = 0.7740384615384616 and Positive = 0.22596153846153846


In [57]:
ndtv_neg=0
ndtv_tot = len(ndtv_list_pred)
for i in ndtv_list_pred:
    if i=='negative':
        ndtv_neg=toi_neg+1

ndtv_pos = ndtv_tot-ndtv_neg

print("Total number of news articles in NDTV =", ndtv_tot)
print("toi_negative", ndtv_neg)
print("toi_postive", ndtv_pos)
print("Probablity distribution of Negative =", ndtv_neg/ndtv_tot,"and Positive =", ndtv_pos/ndtv_tot)


Total number of news articles in NDTV = 198
toi_negative 162
toi_postive 36
Probablity distribution of Negative = 0.8181818181818182 and Positive = 0.18181818181818182


In [59]:
india_neg=0
india_tot = len(india_list_pred)
for i in india_list_pred:
    if i=='negative':
        india_neg=toi_neg+1

india_pos = india_tot-india_neg

print("Total number of news articles in NDTV =", india_tot)
print("toi_negative", india_neg)
print("toi_postive", india_pos)
print("Probablity distribution of Negative =", india_neg/india_tot,"and Positive =", india_pos/india_tot)


Total number of news articles in NDTV = 183
toi_negative 162
toi_postive 21
Probablity distribution of Negative = 0.8852459016393442 and Positive = 0.11475409836065574
