In [1]:
import requests
import pandas as pd
from textblob import TextBlob
import spacy

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Your function to perform sentiment analysis
def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return "Positive"
    elif analysis.sentiment.polarity == 0:
        return "Neutral"
    else:
        return "Negative"

# Your function to perform Named Entity Recognition (NER), Dependency Parsing, and Event Extraction
def analyze_article(article_text):
    doc = nlp(article_text)
    entities = ", ".join([ent.text for ent in doc.ents])
    events = ", ".join([token.lemma_ for token in doc if token.pos_ == "VERB"])
    dependencies = ", ".join([token.dep_ for token in doc])
    return entities, events, dependencies

# Your API endpoint for SerpApi
api_endpoint = "https://serpapi.com/search"

# Your SerpApi key
api_key = "2145d6d7b13649473c8fc27db3144a1fcd104d599cb85342faefc7e612e243ca"  # IMPORTANT: Make sure to keep your API keys secure

# Set up your parameters including the engine type and your query.
params = {
    "engine": "google_news",
    "q": "latest business news in India",  # Your search query
    "location": "India",
    "language": "en",
    "num": "20",  # Attempt to limit results to 20 (may depend on API behavior)
    "api_key": api_key
}

try:
    response = requests.get(api_endpoint, params=params)
    if response.status_code == 200:
        data = response.json()
        # Assuming 'news_results' is the key containing the article list, adjust as necessary
        # Limit the processing to the first 20 articles
        articles = data.get('news_results', [])[:]

        # Initialize lists to store article data
        titles = []
        links = []
        sources = []
        sentiments = []
        entities_list = []
        events_list = []
        dependencies_list = []

        # Process only the first 20 articles
        for article in articles:
            source_name = article.get('source', {}).get('name')
            allowed_sources = [
                "The Economic Times",
                "Mint",
                "Hindustan Times",
                "World Economic Forum",
                "Moneycontrol",
                "NDTV",
                "Business Today",
                "The Financial Express",
                "Food Business News",
                "The Times of India",
                "Financial Post",
                "HBR.org Daily",
                "Zee Business",
                "Business Today",
                "Fortune India",
                "Forbes India",
                "India TV News"
            ]

            if source_name in allowed_sources:
                title = article.get('title', '')
                link = article.get('link', '')
                
                entities, events, dependencies = analyze_article(title)
                titles.append(title)
                links.append(link)
                sources.append(source_name)
                sentiments.append(get_sentiment(title))  # Using title for sentiment analysis (can be replaced with content)
                entities_list.append(entities)
                events_list.append(events)
                dependencies_list.append(dependencies)

        # Create a DataFrame from the lists
        df = pd.DataFrame({'Title': titles, 'Link': links, 'Source': sources, 'Sentiment': sentiments, 'Entities': entities_list, 'Events': events_list, 'Dependencies': dependencies_list})
        
    else:
        print(f"Failed to fetch data: HTTP {response.status_code}")

except Exception as e:
    print(f"An error occurred: {e}")

df 

Unnamed: 0,Title,Link,Source,Sentiment,Entities,Events,Dependencies
0,"Ford expanding business in India, but for a di...",https://m.economictimes.com/industry/renewable...,The Economic Times,Neutral,"Ford, India",expand,"ROOT, acl, dobj, prep, pobj, punct, cc, conj, ..."
1,"US jobs boost, Indian services sector growth, ...",https://www.weforum.org/agenda/2023/10/us-jobs...,World Economic Forum,Negative,"US, Indian, this week",read,"compound, nsubj, ROOT, punct, amod, compound, ..."
2,Monthly SIPs hit new high in February,https://timesofindia.indiatimes.com/business/i...,The Times of India,Positive,"Monthly, February",hit,"amod, nsubj, ROOT, amod, dobj, prep, pobj"
3,General Mills investing $12 million in India,https://www.foodbusinessnews.net/articles/2462...,Food Business News,Positive,"General Mills, $12 million, India",invest,"compound, nsubj, ROOT, quantmod, compound, dob..."
4,Salesforce logs 35% YoY growth in new business...,https://www.zeebiz.com/companies/news-salesfor...,Zee Business,Positive,"35%, YoY, India",log,"nsubj, ROOT, nummod, compound, compound, dobj,..."
...,...,...,...,...,...,...,...
57,Realpolitik and India,https://www.financialexpress.com/opinion/realp...,The Financial Express,Neutral,India,,"ROOT, cc, conj"
58,Changing winds in the Indian Ocean,https://www.financialexpress.com/opinion/chang...,The Financial Express,Neutral,the Indian Ocean,change,"ROOT, dobj, prep, det, compound, pobj"
59,Year 2024: What lies ahead for India’s leading...,https://www.livemint.com/news/india/year-2024-...,Mint,Neutral,"Year 2024, India, Mint","lie, lead, own","ROOT, nummod, punct, nsubj, ROOT, advmod, prep..."
60,India Inc.'s Premium Play,https://www.fortuneindia.com/long-reads/india-...,Fortune India,Neutral,India Inc.'s,,"compound, poss, case, compound, ROOT"


In [2]:
df.to_csv("Live_News.csv", index=False)