In [1]:
import pandas as pd
import numpy as np


In [2]:
# Define empty news dataframe with required schema
news_df = pd.DataFrame(columns=[
    "date",
    "headline",
    "source"
])

news_df


Unnamed: 0,date,headline,source


In [3]:
# Manually add sample financial news headlines for Reliance
news_data = [
    {"date": "2025-01-13", "headline": "Reliance Industries shares fall amid weak global cues", "source": "Financial News"},
    {"date": "2025-01-14", "headline": "Reliance reports steady earnings growth in Q3", "source": "Business Standard"},
    {"date": "2025-01-15", "headline": "Positive outlook for Reliance as retail segment expands", "source": "Economic Times"},
    {"date": "2025-01-16", "headline": "Market volatility impacts Reliance stock performance", "source": "Moneycontrol"},
    {"date": "2025-01-17", "headline": "Reliance shares surge on strong investor sentiment", "source": "CNBC"}
]

news_df = pd.DataFrame(news_data)

news_df


Unnamed: 0,date,headline,source
0,2025-01-13,Reliance Industries shares fall amid weak glob...,Financial News
1,2025-01-14,Reliance reports steady earnings growth in Q3,Business Standard
2,2025-01-15,Positive outlook for Reliance as retail segmen...,Economic Times
3,2025-01-16,Market volatility impacts Reliance stock perfo...,Moneycontrol
4,2025-01-17,Reliance shares surge on strong investor senti...,CNBC


In [4]:
import re
import nltk
from nltk.corpus import stopwords

# Download stopwords (run only once)
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def clean_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Remove punctuation and special characters
    text = re.sub(r'[^a-z\s]', '', text)
    
    # Remove stopwords
    words = text.split()
    words = [word for word in words if word not in stop_words]
    
    return " ".join(words)

# Apply text cleaning
news_df['clean_headline'] = news_df['headline'].apply(clean_text)

news_df


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,date,headline,source,clean_headline
0,2025-01-13,Reliance Industries shares fall amid weak glob...,Financial News,reliance industries shares fall amid weak glob...
1,2025-01-14,Reliance reports steady earnings growth in Q3,Business Standard,reliance reports steady earnings growth q
2,2025-01-15,Positive outlook for Reliance as retail segmen...,Economic Times,positive outlook reliance retail segment expands
3,2025-01-16,Market volatility impacts Reliance stock perfo...,Moneycontrol,market volatility impacts reliance stock perfo...
4,2025-01-17,Reliance shares surge on strong investor senti...,CNBC,reliance shares surge strong investor sentiment


In [5]:
!pip install vaderSentiment




In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to get sentiment score and label
def get_sentiment(text):
    score = analyzer.polarity_scores(text)['compound']
    
    if score > 0.05:
        sentiment = "Positive"
    elif score < -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
        
    return score, sentiment

# Apply sentiment analysis
news_df[['sentiment_score', 'sentiment_label']] = news_df['clean_headline'].apply(
    lambda x: pd.Series(get_sentiment(x))
)

news_df


Unnamed: 0,date,headline,source,clean_headline,sentiment_score,sentiment_label
0,2025-01-13,Reliance Industries shares fall amid weak glob...,Financial News,reliance industries shares fall amid weak glob...,-0.1779,Negative
1,2025-01-14,Reliance reports steady earnings growth in Q3,Business Standard,reliance reports steady earnings growth q,0.3818,Positive
2,2025-01-15,Positive outlook for Reliance as retail segmen...,Economic Times,positive outlook reliance retail segment expands,0.6124,Positive
3,2025-01-16,Market volatility impacts Reliance stock perfo...,Moneycontrol,market volatility impacts reliance stock perfo...,0.0,Neutral
4,2025-01-17,Reliance shares surge on strong investor senti...,CNBC,reliance shares surge strong investor sentiment,0.6705,Positive


In [7]:
# Convert date column to datetime
news_df['date'] = pd.to_datetime(news_df['date'])

# Aggregate sentiment at daily level
daily_sentiment = news_df.groupby('date').agg(
    avg_sentiment_score=('sentiment_score', 'mean'),
    positive_news_count=('sentiment_label', lambda x: (x == 'Positive').sum()),
    negative_news_count=('sentiment_label', lambda x: (x == 'Negative').sum()),
    neutral_news_count=('sentiment_label', lambda x: (x == 'Neutral').sum())
).reset_index()

daily_sentiment


Unnamed: 0,date,avg_sentiment_score,positive_news_count,negative_news_count,neutral_news_count
0,2025-01-13,-0.1779,0,1,0
1,2025-01-14,0.3818,1,0,0
2,2025-01-15,0.6124,1,0,0
3,2025-01-16,0.0,0,0,1
4,2025-01-17,0.6705,1,0,0


In [8]:
# Save daily sentiment data to processed layer
daily_sentiment.to_csv("../data/processed/reliance_daily_sentiment.csv", index=False)

print("Daily sentiment data saved successfully.")


Daily sentiment data saved successfully.
