<a href="https://colab.research.google.com/github/Shashank-Girish/FinancialNewsSentimentAnalysis/blob/main/ML_Minor_FinNews.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install vaderSentiment tensorflow



In [2]:
import pandas as pd
import numpy as np
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
from google.colab import drive
from wordcloud import WordCloud
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset (adjust the file path as needed)
file_path = '/content/drive/My Drive/IndianFinancialNews.csv'
df = pd.read_csv(file_path)

# Check the dataset structure
df.head()


Mounted at /content/drive


Unnamed: 0.1,Unnamed: 0,Date,Title,Description
0,0,"May 26, 2020, Tuesday","ATMs to become virtual bank branches, accept d...","Close to 14.6 per cent (or 35,000) of the 240,..."
1,1,"May 26, 2020, Tuesday",IDFC First Bank seniors to forgo 65% of bonus ...,"V Vaidyanathan, managing director and chief ex..."
2,2,"May 25, 2020, Monday","Huge scam in YES Bank for many years, says Enf...",Rana Kapoor's wife also charged with abetting ...
3,3,"May 24, 2020, Sunday","Bank of Maharashtra sanctioned Rs 2,789 cr in ...",The bank said it was now gearing up to extend ...
4,4,"May 23, 2020, Saturday",DCB Bank's profit before tax declines 37.6% to...,Net profit for the financial year ended March ...


In [6]:
# Remove rows with missing values in relevant columns
df.dropna(subset=['Title', 'Description'], inplace=True)

# Define a function to clean text (remove URLs, punctuation, etc.)
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Apply text cleaning to 'title' and 'description'
df['cleaned_description'] = df['Description'].apply(clean_text)

# Check cleaned data
df[['Description', 'cleaned_description']].head()


Unnamed: 0,Description,cleaned_description
0,"Close to 14.6 per cent (or 35,000) of the 240,...",close to 146 per cent or 35000 of the 240000 a...
1,"V Vaidyanathan, managing director and chief ex...",v vaidyanathan managing director and chief exe...
2,Rana Kapoor's wife also charged with abetting ...,rana kapoors wife also charged with abetting c...
3,The bank said it was now gearing up to extend ...,the bank said it was now gearing up to extend ...
4,Net profit for the financial year ended March ...,net profit for the financial year ended march ...


In [7]:
# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Define function for VADER sentiment
def get_vader_sentiment(text):
    score = analyzer.polarity_scores(text)["compound"]
    return 'positive' if score > 0.05 else 'negative' if score < -0.05 else 'neutral'

# Define function for TextBlob sentiment
def get_textblob_sentiment(text):
    score = TextBlob(text).sentiment.polarity
    return 'positive' if score > 0 else 'negative' if score < 0 else 'neutral'

# Apply sentiment functions to 'cleaned_description' column
df['vader_sentiment'] = df['cleaned_description'].apply(get_vader_sentiment)
df['textblob_sentiment'] = df['cleaned_description'].apply(get_textblob_sentiment)

# Filter rows where VADER and TextBlob sentiments match
df = df[df['vader_sentiment'] == df['textblob_sentiment']].copy()

# Check the resulting dataframe
df[['cleaned_description', 'vader_sentiment', 'textblob_sentiment']].head()


Unnamed: 0,cleaned_description,vader_sentiment,textblob_sentiment
0,close to 146 per cent or 35000 of the 240000 a...,neutral,neutral
6,hdfc bank cuts base rate by 55 bps sbi calls a...,negative,negative
11,enquiries for fresh loans see uptick,positive,positive
15,loans sanctioned by public sector banks psbs a...,neutral,neutral
16,senior bank executives said the indian banks a...,neutral,neutral
