Ishan Dubey  
Dept. of CSE  
Ramaiah Institute of Technology


In [None]:
%%capture
!pip install transformers

In [None]:
import re
import requests

import numpy as np
import pandas as pd

import torch
from transformers import pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# HuggingFace pipeline for Sentiment Analysis
sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")

# Downloads and sets up model trained on BBC News dataset
model = AutoModelForSequenceClassification.from_pretrained("abhishek/autonlp-bbc-news-classification-37229289")
tokenizer = AutoTokenizer.from_pretrained("abhishek/autonlp-bbc-news-classification-37229289")

Downloading:   0%|          | 0.00/687 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/963 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/311 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
def get_news():
  # Function to query NewsAPI. Returns a list of article titles and their description

  # NewsAPI setup
  query_params = {
      "source": "The Indian Express",
      "sortBy": "top",
      "apiKey": "540e7fbdd83349cb93414db46d36c6f3", # API Limit is 100 requests per day
  }
  main_url = "https://newsapi.org/v2/top-headlines?country=in"
  
  # Get top articles from newsAPI
  res = requests.get(main_url, params=query_params)
  content = res.json()

  articles = content['articles']

  titles = list()
  article_text = list()
  for article in articles:
    titles.append(article['title'])
    article_text.append(article['description']) 
  
  return titles, article_text

def analyse_sentiment(text): 
  # Performs sentiment Analysis using a fine-tune RoBERTa Model

  return sentiment_analysis(text)

def find_topic(title):
  # Uses a transformer model to classify article into the topics, trained on BBC news dataset
  
  id2label = {0:'business', 1:'entertainment', 2:'politics', 3:'sport', 4:'tech'}
  inputs = tokenizer(title, return_tensors="pt")
  outputs = model(**inputs)
  topic = id2label[torch.argmax(outputs.logits).item()]
  return topic

In [None]:
output = list()

#Query the API to obtain the dataset
titles, articles = get_news()

#Perform sentiment analysis
for title, article_text in zip(titles, articles):
  sentiment = analyse_sentiment(str(article_text))
  topic = find_topic(title)
  output.append((title, article_text, sentiment[0]['label'], sentiment[0]['score'], topic))

#Output the results
df = pd.DataFrame(output, columns=['title','article_text', 'sentiment', 'confidence', 'topic'])
df[:]

Unnamed: 0,title,article_text,sentiment,confidence,topic
0,Research explains how plants optimise photosyn...,"For research, plants are frequently grown unde...",POSITIVE,0.99872,tech
1,"Samsung in 2023: Predictions, rumors, and what...",,NEGATIVE,0.997574,tech
2,ACV Keto Gummies {Shocking US & CA Customer Re...,The many health benefits of apple cider vinega...,POSITIVE,0.998744,sport
3,UP Opposition To Stay Away From Congress Yatra...,Rahul Gandhi's Bharat Jodo Yatra might draw a ...,POSITIVE,0.985397,politics
4,Samsung Galaxy S22 FE is not dead! Could get S...,Samsung Galaxy S22 FE is now tipped to launch ...,POSITIVE,0.993227,tech
5,New Gigabyte G5 gaming laptops launched in Ind...,Gigabyte's new gaming laptops come powered by ...,POSITIVE,0.998096,tech
6,"For Salman Khan, Birthday Wish From Sister Arp...","""Happiest Birthday to my life line,"" wrote Arp...",POSITIVE,0.998883,entertainment
7,Ethnic Serbs erect more roadblocks as tensions...,Kosovo has demanded barriers erected by Serb p...,NEGATIVE,0.997758,business
8,Tunisha Sharma Suicide Case LIVE Updates: Actr...,Tunisha Sharma Death Live Updates: Shivin Nara...,POSITIVE,0.995593,entertainment
9,Unadkat: Getting a Test wicket is something I ...,The India fast bowler reflects on his Test com...,POSITIVE,0.997454,sport
