In [1]:
import pandas as pd
import requests
from textblob import TextBlob
from dotenv import load_dotenv
import spacy
from fuzzywuzzy import process
import re

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load API keys from environment variable or any other source
api_keys = ["c016e6eeff684982bcfdef26b326c2b7", "5c8422ca2b604f4481e4a43aa95bb074", "cc7baa3c527f4624873dc8f237b2df5e", "060a295073e94514a268c437f2d0fc64", "67a4bf1c0713482fb55ad691623aab49", "a40ac68e681949d28f84e7152cb56d64", "c223bf69c1d24884ad47c00ffe4b4edc"]
current_key_index = 0

def get_api_key():
    global current_key_index
    api_key = api_keys[current_key_index]
    current_key_index = (current_key_index + 1) % len(api_keys)
    return api_key

# Load dataframes
stock_df = pd.read_csv("Stock_DataFrame.csv")
def_df = pd.read_csv("Sector_Definitions.csv")

# Define basic industry categories
basic_ind = def_df["Basic Industry"].apply(lambda x: x.replace("\n", "")
                                                    .replace("&", "& ")
                                                    .replace("/", "/ ")
                                                    .replace("  ", " ")).unique()

# Define the pre-defined categories for investors and politicians
categories = {industry: [industry] for industry in basic_ind}

# Function to perform sentiment analysis
def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return "Positive"
    elif analysis.sentiment.polarity == 0:
        return "Neutral"
    else:
        return "Negative"

# Function to fetch news data
def fetch_news_data():
    news_data = []
    for category, keywords in categories.items():
        print(f"Fetching articles for category: {category}")
        for keyword in keywords:
            url = f"https://newsapi.org/v2/everything?q=India+{keyword}&from=2024-03-08&to=2024-03-09&sortBy=publishedAt&apiKey={get_api_key()}"
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                articles = data["articles"][:5]  # Limit to 5 articles per category
                print(f"Number of articles fetched for {category}: {len(articles)}")
                for article in articles:
                    news_data.append({
                        "Category": category,
                        "News Headline": article["title"],
                        "Sentiment": get_sentiment(article["title"])
                    })
            elif response.status_code == 429:  # Rate Limit Exceeded
                print("API Rate Limit Exceeded. Trying with another API key...")
                continue
            else:
                print(f"Failed to fetch articles for {category}. Status code: {response.status_code}")
    return pd.DataFrame(news_data)

# Get news data and display
news_df = fetch_news_data()

# Process sector definitions
def_df["Basic Industry"] = def_df["Basic Industry"].apply(lambda x: x.replace("\n", "")
                                                    .replace("&", "& ")
                                                    .replace("/", "/ ")
                                                    .replace("  ", " "))

# Merge news data with sector definitions
merged_news_df = pd.merge(news_df, def_df[["Basic Industry", "Sector"]], left_on="Category", right_on="Basic Industry", how="left")
merged_news_df.drop(["Basic Industry"], axis=1, inplace=True)

# Function to find similar sector or industry
def find_similar_sector_or_industry(query, choices):
    match = process.extractOne(query, choices)
    return match[0] if match else None


# Iterate through merged_news_df to add Ticker
tickers = []
for index, row in merged_news_df.iterrows():
    sector = row['Sector']
    ticker = None
    
    # Try direct mapping first
    direct_mapping = stock_df.loc[stock_df['Sector'] == sector, 'Ticker']
    if not direct_mapping.empty:
        ticker = direct_mapping.iloc[0]
    else:
        # If direct mapping fails, find similar sector using fuzzy matching
        similar_sector = find_similar_sector_or_industry(sector, stock_df['Sector'])
        if similar_sector:
            similar_mapping = stock_df.loc[stock_df['Sector'] == similar_sector, 'Ticker']
            if not similar_mapping.empty:
                ticker = similar_mapping.iloc[0]
    
    tickers.append(ticker)

merged_news_df['Ticker'] = tickers

# Merge with stock_df to get Name
final_news_df = pd.merge(merged_news_df, stock_df[["Ticker", "Name"]], on="Ticker", how="left")
final_news_df["Sector"] = final_news_df["Sector"].apply(lambda x: x.replace("\n", " "))

# Save the final dataframe to CSV
final_news_df.to_csv(r"C:\Users\DELL\Desktop\Projects\LOC 6.0\Final News DataFrame\Basic Industry DataFrame.csv", index=False)



Fetching articles for category: CommodityChemicals
Number of articles fetched for CommodityChemicals: 0
Fetching articles for category: SpecialtyChemicals
Number of articles fetched for SpecialtyChemicals: 0
Fetching articles for category: Carbon Black
Number of articles fetched for Carbon Black: 3
Fetching articles for category: Dyes And Pigments
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: Explosives
Number of articles fetched for Explosives: 3
Fetching articles for category: Petrochemicals
Number of articles fetched for Petrochemicals: 4
Fetching articles for category: Printing Inks
Number of articles fetched for Printing Inks: 2
Fetching articles for category: Trading - Chemicals
Number of articles fetched for Trading - Chemicals: 2
Fetching articles for category: Industrial Gases
Number of articles fetched for Industrial Gases: 4
Fetching articles for category: Fertilizers
Number of articles fetched for Fertilizers: 5
Fetching articles fo

Number of articles fetched for E-Retail/ E-Commerce: 5
Fetching articles for category: Internet & Catalogue Retail
Number of articles fetched for Internet & Catalogue Retail: 5
Fetching articles for category: Distributors
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: GasTransmission/ Marketing
Number of articles fetched for GasTransmission/ Marketing: 0
Fetching articles for category: LPG/ CNG/ PNG/ LNG Supplier
Number of articles fetched for LPG/ CNG/ PNG/ LNG Supplier: 0
Fetching articles for category: Trading - Gas
Number of articles fetched for Trading - Gas: 5
Fetching articles for category: Oil Exploration & Production
Number of articles fetched for Oil Exploration & Production: 5
Fetching articles for category: Offshore SupportSolution Drilling
Number of articles fetched for Offshore SupportSolution Drilling: 0
Fetching articles for category: Oil Storage & Transportation
Number of articles fetched for Oil Storage & Transportation: 4
Fetc

Number of articles fetched for Other IndustrialProducts: 0
Fetching articles for category: Glass - Industrial
Number of articles fetched for Glass - Industrial: 2
Fetching articles for category: Aluminium, Copper& Zinc Products
Number of articles fetched for Aluminium, Copper& Zinc Products: 1
Fetching articles for category: Iron & SteelProducts
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: Abrasives & Bearings
Number of articles fetched for Abrasives & Bearings: 0
Fetching articles for category: Compressors,Pumps & DieselEngines
Number of articles fetched for Compressors,Pumps & DieselEngines: 0
Fetching articles for category: Electrodes & Refractories
Number of articles fetched for Electrodes & Refractories: 0
Fetching articles for category: Computers -Software & Consulting
Number of articles fetched for Computers -Software & Consulting: 4
Fetching articles for category: Software Products
Number of articles fetched for Software Products: 5
Fe

In [3]:
category_list = [
    'Market Updates',
    'Earnings Reports',
    'Company News',
    'Sector News',
    'IPOs and Offerings',
    'Market Analysis',
    'Global Markets',
    'Economic Indicators',
    'Commodities and Forex',
    'Regulatory News',
    'Market Volatility',
    'Corporate Governance',
    'Technical Analysis',
    'Investment Strategies',
    'Personal Finance',
    'Market Sentiment',
    'Market Events',
    'Dividends and Buybacks',
    'Market Innovations',
    'Market Regulations',
    'Market Psychology',
    'Market Education',
    'Market Trends',
    'Market Disruptions',
    'Market Ethics',
    'Market Performance Metrics',
    'Market Research',
    'Market Outlook',
    'Market Technology',
    'Market Commentary',
    'Market Sentiment Indicators',
    'Market Performance by Geography',
    'Market Events Calendar',
    'Market Risk Management',
    'Market Insider Trading',
    'Market Investment Vehicles',
    'Market Quantitative Analysis',
    'Market Behavioral Finance',
    'Market Performance Benchmarks',
    'Market Crowdfunding and Crowdsourcing',
    'Market Philanthropy and Impact Investing',
    'Market Alternative Investments',
    'Market Data Analytics',
    'Market Behavioral Economics',
    'Government Budget and Fiscal Policy',
    'Monetary Policy and Central Banks',
    'Government Regulations and Policies',
    'Trade Policies and Tariffs',
    'Geopolitical Events and Risks',
    'Environmental, Social, and Governance (ESG) Investing',
    'Climate Change and Market Impacts',
    'Healthcare Policies and Regulations',
    'Education and Training',
    'Labor Market and Employment Trends',
    'Emerging Technologies and Innovation',
    'Cybersecurity and Data Privacy',
    'Infrastructure Development and Investments',
    'Energy Policies and Renewable Energy',
    'Consumer Trends and Behavior'
]

# Define the pre-defined categories for investors and politicians
categories = {category: [category] for category in category_list}

news_df = fetch_news_data()

Fetching articles for category: Market Updates
Number of articles fetched for Market Updates: 5
Fetching articles for category: Earnings Reports
Number of articles fetched for Earnings Reports: 5
Fetching articles for category: Company News
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: Sector News
Number of articles fetched for Sector News: 5
Fetching articles for category: IPOs and Offerings
Number of articles fetched for IPOs and Offerings: 4
Fetching articles for category: Market Analysis
Number of articles fetched for Market Analysis: 5
Fetching articles for category: Global Markets
Number of articles fetched for Global Markets: 5
Fetching articles for category: Economic Indicators
Number of articles fetched for Economic Indicators: 5
Fetching articles for category: Commodities and Forex
Number of articles fetched for Commodities and Forex: 0
Fetching articles for category: Regulatory News
API Rate Limit Exceeded. Trying with another API ke

In [4]:
news_df

Unnamed: 0,Category,News Headline,Sentiment
0,Market Updates,Shaitaan Opening Day Box Office: Ajay Devgn’s ...,Neutral
1,Market Updates,In Vitro Diagnostics Market Size Worth USD 128...,Positive
2,Market Updates,Copper Fees Plunge Close to Zero in Test For C...,Neutral
3,Market Updates,Global tech firms likely to lead India’s local...,Neutral
4,Market Updates,Govt keeps interest rates on small savings sch...,Negative
...,...,...,...
173,Infrastructure Development and Investments,Cold Chain Logistics Market Size to Grow USD 8...,Negative
174,Infrastructure Development and Investments,"Global Carbon Steel Market to Reach $1,284.51 ...",Positive
175,Energy Policies and Renewable Energy,Green Hydrogen Market Analysis to 2033: Solar ...,Neutral
176,Energy Policies and Renewable Energy,Concentrated Solar Power Market to Reach $12.0...,Positive


In [5]:
unique_sectors = def_df["Sector"].unique()

# Iterate through merged_news_df to add Sector
sectors = []
for index, row in merged_news_df.iterrows():
    headline = row['News Headline']
    sector = None
    
    # Find similar sector using fuzzy matching
    similar_sector = find_similar_sector_or_industry(headline, unique_sectors)
    if similar_sector:
        sector = similar_sector
    
    sectors.append(sector)

merged_news_df['Sector'] = sectors

In [9]:
final_news_df = pd.merge(merged_news_df, stock_df[["Ticker", "Name"]], on="Ticker", how="left")
# Save the final dataframe to CSV
final_news_df.to_csv(r"C:\Users\DELL\Desktop\Projects\LOC 6.0\Final News DataFrame\General DataFrame.csv", index=False)

In [11]:
import pandas as pd
import requests
from textblob import TextBlob
from dotenv import load_dotenv
from collections import Counter

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load API keys from environment variable or any other source
api_keys = ["c016e6eeff684982bcfdef26b326c2b7", "5c8422ca2b604f4481e4a43aa95bb074", "cc7baa3c527f4624873dc8f237b2df5e", "060a295073e94514a268c437f2d0fc64", "67a4bf1c0713482fb55ad691623aab49", "a40ac68e681949d28f84e7152cb56d64", "c223bf69c1d24884ad47c00ffe4b4edc"]
current_key_index = 0

def get_api_key():
    global current_key_index
    api_key = api_keys[current_key_index]
    current_key_index = (current_key_index + 1) % len(api_keys)
    return api_key

# Load dataframes
stock_df = pd.read_csv("Stock_DataFrame.csv")
def_df = pd.read_csv("Sector_Definitions.csv")

# Extract prominent keywords from definitions
def extract_keywords(text):
    doc = nlp(text)
    tokens = [token.text.lower() for token in doc if token.pos_ in ['NOUN', 'PROPN'] and not token.is_stop]
    return tokens

all_keywords = []
for definition in def_df['Definition']:
    keywords = extract_keywords(definition)
    all_keywords.extend(keywords)

# Get the most common keywords as categories
category_count = Counter(all_keywords)
categories = [category for category, count in category_count.most_common()]

# Function to fetch news data for a specific category
def fetch_news_data(category):
    news_data = []
    print(f"Fetching articles for category: {category}")
    url = f"https://newsapi.org/v2/everything?q=India+{category}&from=2024-03-08&to=2024-03-09&sortBy=publishedAt&apiKey={get_api_key()}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        articles = data["articles"][:5]  # Limit to 5 articles per category
        print(f"Number of articles fetched for {category}: {len(articles)}")
        for article in articles:
            news_data.append({
                "Category": category,
                "News Headline": article["title"],
                "Sentiment": get_sentiment(article["title"])
            })
    elif response.status_code == 429:  # Rate Limit Exceeded
        print("API Rate Limit Exceeded. Trying with another API key...")
    else:
        print(f"Failed to fetch articles for {category}. Status code: {response.status_code}")
    return pd.DataFrame(news_data)

# Function to perform sentiment analysis
def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return "Positive"
    elif analysis.sentiment.polarity == 0:
        return "Neutral"
    else:
        return "Negative"

# Fetch news data for each category
news_data = pd.DataFrame()
for category in categories:
    news_data = pd.concat([news_data, fetch_news_data(category)], ignore_index=True)

news_data

Fetching articles for category: companies
Number of articles fetched for companies: 5
Fetching articles for category: manufacturers
Number of articles fetched for manufacturers: 5
Fetching articles for category: distributors
Number of articles fetched for distributors: 5
Fetching articles for category: products
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: services
Number of articles fetched for services: 5
Fetching articles for category: distributor
Number of articles fetched for distributor: 4
Fetching articles for category: consumer
Number of articles fetched for consumer: 5
Fetching articles for category: distribution
Number of articles fetched for distribution: 5
Fetching articles for category: trading
Number of articles fetched for trading: 5
Fetching articles for category: operators
Number of articles fetched for operators: 5
Fetching articles for category: owners
API Rate Limit Exceeded. Trying with another API key...
Fetching articles 

Number of articles fetched for hospitals: 5
Fetching articles for category: bridges
Number of articles fetched for bridges: 4
Fetching articles for category: engineering
Number of articles fetched for engineering: 5
Fetching articles for category: manufactures
Number of articles fetched for manufactures: 5
Fetching articles for category: cargo
Number of articles fetched for cargo: 5
Fetching articles for category: fibres
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: explosives
Number of articles fetched for explosives: 3
Fetching articles for category: dyes
Number of articles fetched for dyes: 1
Fetching articles for category: pigments
Number of articles fetched for pigments: 0
Fetching articles for category: propylene
Number of articles fetched for propylene: 0
Fetching articles for category: printing
Number of articles fetched for printing: 5
Fetching articles for category: material
Number of articles fetched for material: 5
Fetching articles

Number of articles fetched for phones: 5
Fetching articles for category: cables
Number of articles fetched for cables: 4
Fetching articles for category: bearings
Number of articles fetched for bearings: 2
Fetching articles for category: software
Number of articles fetched for software: 5
Fetching articles for category: consulting
Number of articles fetched for consulting: 5
Fetching articles for category: infrastructure
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: marine
Number of articles fetched for marine: 5
Fetching articles for category: process
Number of articles fetched for process: 5
Fetching articles for category: outsourcing
Number of articles fetched for outsourcing: 4
Fetching articles for category: instruments
Number of articles fetched for instruments: 5
Fetching articles for category: electricity
Number of articles fetched for electricity: 5
Fetching articles for category: generation
Number of articles fetched for generation: 5


Number of articles fetched for bowls: 3
Fetching articles for category: interior
Number of articles fetched for interior: 5
Fetching articles for category: paints
Number of articles fetched for paints: 3
Fetching articles for category: carpets
Number of articles fetched for carpets: 1
Fetching articles for category: curtains
Number of articles fetched for curtains: 3
Fetching articles for category: floor
Number of articles fetched for floor: 5
Fetching articles for category: wall
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: tiles
Number of articles fetched for tiles: 2
Fetching articles for category: knitwears
Number of articles fetched for knitwears: 0
Fetching articles for category: innerwears
Number of articles fetched for innerwears: 0
Fetching articles for category: textiles
Number of articles fetched for textiles: 5
Fetching articles for category: fabrics
Number of articles fetched for fabrics: 1
Fetching articles for category: silk
Numb

Number of articles fetched for foods: 5
Fetching articles for category: biscuits
Number of articles fetched for biscuits: 2
Fetching articles for category: chocolates
Number of articles fetched for chocolates: 1
Fetching articles for category: chips
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: culture
Number of articles fetched for culture: 5
Fetching articles for category: fish
Number of articles fetched for fish: 5
Fetching articles for category: shrimp
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: sea/
Number of articles fetched for sea/: 5
Fetching articles for category: freshwater
Number of articles fetched for freshwater: 2
Fetching articles for category: meat
Number of articles fetched for meat: 5
Fetching articles for category: breeders
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: chicken
API Rate Limit Exceeded. Trying with another API key...
Fetching

API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: epc
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: projects
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: buildings
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: realty
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: civil
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: aerospace
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: farm
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: trucks
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: dumpers
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: cranes
API Rate Limit Exceeded. Trying with another AP

API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: city
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: town
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: authority
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: region
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: band
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: telephone
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: presence
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: water
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: supply
API Rate Limit Exceeded. Trying with another API key...
Fetching articles for category: waste
API Rate Limit Exceeded. Trying with another API k

Unnamed: 0,Category,News Headline,Sentiment
0,companies,How did the world run so low on cholera vaccin...,Negative
1,companies,"India Enters The AI Race, With Plans Of Develo...",Neutral
2,companies,Mining Explosives and Systems Market to Reach ...,Neutral
3,companies,Select payments to units in IFSC exempted from...,Neutral
4,companies,"AstraZeneca bets on licencing deals, public-pr...",Neutral
...,...,...,...
1371,equity,The History of Public Health Colonialism ⋆ Bro...,Neutral
1372,equity,Shark Tank India 3: GenZ-inspired fashion bran...,Neutral
1373,equity,SAVENCIA FROMAGE & DAIRY: 2023 annual financia...,Neutral
1374,equity,SAVENCIA FROMAGE & DAIRY: 2023 annual financia...,Neutral


In [32]:
from fuzzywuzzy import process

# Get unique sectors from stock_df
unique_definitions = stock_df['Definition'].unique()

# Function to find the most similar sector for a given news headline
def find_similar_sector(headline, sectors):
    match = process.extractOne(headline, sectors)
    return match[0] if match else None

# Iterate through news_df to add Definition
for index, row in news_df.iterrows():
    headline = row['News Headline']
    definition = find_similar_sector(headline, unique_definitions)
    news_data['Definition'] = definition

final_news_df = pd.merge(news_data, stock_df[["Definition", "Ticker", "Name", "Sector"]], on="Definition", how="left")
final_news_df.drop(['Definition'], axis=1, inplace=True)

# Save the final dataframe to CSV
final_news_df.to_csv(r"C:\Users\DELL\Desktop\Projects\LOC 6.0\Final News DataFrame\Definition DataFrame.csv", index=False)