##Step 1: Install Dependencies


In [1]:
!pip install pyspark textblob newsapi-python

Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7


##Step 2: Imports and Spark Session Setup


In [4]:
import os
import requests
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
from textblob import TextBlob
from typing import List, Dict
from getpass import getpass

spark = (
    SparkSession.builder
    .appName("ColabNewsSentimentAnalysis")
    .master("local[*]")
    .getOrCreate()
)

##Step 3: News Fetching Function


In [5]:
def fetch_latest_news(api_key: str, query: str = 'technology') -> list[dict]:
    url = f"https://newsdata.io/api/1/news?apikey={api_key}&q={query}&language=en"

    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        articles = data.get('results', [])

        formatted_articles = [
            {
                "title": article.get("title"),
                "description": article.get("description"),
                "url": article.get("link"),
                "source": article.get("source_id")
            }
            for article in articles if article.get("title")
        ]
        return formatted_articles

    except requests.exceptions.RequestException as e:
        print(f"Error fetching news: {e}")
        return []

NEWSDATA_API_KEY = getpass('Enter your newsdata.io API key: ')

Enter your newsdata.io API key: ··········


##Step 4: Sentiment Analysis

In [10]:
def classify_sentiment(text: str) -> str:
    if not text:
        return 'Neutral'
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    if polarity > 0.1:
        return 'Positive'
    elif polarity < -0.1:
        return 'Negative'
    else:
        return 'Neutral'

def analyze_news_sentiment(articles: List[Dict]) -> List[Dict]:
    if not articles:
        return []

    news_df = spark.createDataFrame(articles)

    sentiment_udf = udf(classify_sentiment, StringType())

    classified_df = news_df.withColumn("sentiment", sentiment_udf(news_df["title"]))

    results = [row.asDict() for row in classified_df.collect()]
    return results

In [11]:
print("Fetching latest news from newsdata.io...")
raw_articles = fetch_latest_news(NEWSDATA_API_KEY, query='finance')

if raw_articles:
    print(f"Fetched {len(raw_articles)} articles. Analyzing sentiment with PySpark...")
    classified_articles = analyze_news_sentiment(raw_articles)
    print("Analysis complete!")

    for article in classified_articles[:5]:
        print(f"\nTitle: {article['title']}")
        print(f"Sentiment: {article['sentiment']}")
else:
    print("Could not fetch any articles.")

Fetching latest news from newsdata.io...
Fetched 10 articles. Analyzing sentiment with PySpark...
Analysis complete!

Title: GST 2.0 Boost: OECD Lifts India’s FY26 Growth Forecast To 6.7%; S&P Retains 6.5%
Sentiment: Neutral

Title: 'No Racism, Instant Medical Access': Expat Shares Why Moving Back To India Was The Best Decision
Sentiment: Positive

Title: Stocks To Buy Today, Sept 24: Top Picks By Riyank Arora of Mehta Equities For Profitable Trading On Wednesday
Sentiment: Positive

Title: Trump administration threatens further sanctions in escalating fight with Harvard
Sentiment: Neutral

Title: 14 killed by lake burst in Taiwan as Typhoon Ragasa wreaks havoc
Sentiment: Negative
