<a href="https://colab.research.google.com/github/JocelynAbey/JocelynAbey/blob/main/financial%20sentiment%20analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords, wordnet, sentiwordnet as swn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [6]:
# Ensure necessary downloads (run these once)
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('sentiwordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [7]:
# Step 1: Load the dataset
file_path = "/content/drive/MyDrive/NLPAssignment/FinancialSentimentAnalysis (1).csv"  # Change the path as needed
fsa = pd.read_csv(file_path)

# Step 1: Display info and description of dataframe
print(fsa.info())
print(fsa.describe())

# Step 2: Preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^a-z\s]', '', text)  # Remove punctuations, numbers, and special characters
    words = word_tokenize(text)
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]  # Lemmatization & stopwords removal
    return " ".join(words)

# Apply preprocessing
fsa["Processed_Sentence"] = fsa["Sentence"].astype(str).apply(preprocess_text)

# Step 3: SentiWordNet Sentiment Score Calculation
def get_sentiwordnet_score(text):
    words = word_tokenize(text)
    score = 0
    count = 0

    for word in words:
        synsets = wordnet.synsets(word)
        if synsets:
            synset = synsets[0]  # Use first synset
            senti_synset = swn.senti_synset(synset.name())
            score += senti_synset.pos_score() - senti_synset.neg_score()
            count += 1

    return score / count if count > 0 else 0

# Apply sentiment scoring
fsa["Sentiment_Score"] = fsa["Processed_Sentence"].apply(get_sentiwordnet_score)

# Display results
print(fsa[["Sentence", "Processed_Sentence", "Sentiment_Score"]].head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5842 entries, 0 to 5841
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Sentence   5842 non-null   object
 1   Sentiment  5842 non-null   object
dtypes: object(2)
memory usage: 91.4+ KB
None
                                                 Sentence Sentiment
count                                                5842      5842
unique                                               5322         3
top     Managing Director 's comments : `` Net sales f...   neutral
freq                                                    2      3130
                                            Sentence  \
0  The GeoSolutions technology will leverage Bene...   
1  $ESI on lows, down $1.50 to $2.50 BK a real po...   
2  For the last quarter of 2010 , Componenta 's n...   
3  According to the Finnish-Russian Chamber of Co...   
4  The Swedish buyout firm has sold its remaining...   

               

In [8]:
import nltk
from nltk.corpus import wordnet

# Ensure necessary downloads (run these once)
nltk.download('wordnet')
nltk.download('omw-1.4')

# Sample words from dataset for analysis
sample_words = ["profit", "loss", "growth", "investment"]

# a. Synsets
for word in sample_words:
    synsets = wordnet.synsets(word)
    print(f"Synsets for '{word}': {[syn.name() for syn in synsets]}")

# b. Synonyms and Antonyms
for word in sample_words:
    synonyms = set()
    antonyms = set()

    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
            if lemma.antonyms():
                antonyms.add(lemma.antonyms()[0].name())

    print(f"Synonyms for '{word}': {synonyms}")
    print(f"Antonyms for '{word}': {antonyms}")

# c. Hyponym and Hypernym
for word in sample_words:
    synsets = wordnet.synsets(word)
    if synsets:
        hyponyms = synsets[0].hyponyms()
        hypernyms = synsets[0].hypernyms()
        print(f"Hyponyms for '{word}': {[hypo.lemma_names() for hypo in hyponyms]}")
        print(f"Hypernyms for '{word}': {[hyper.lemma_names() for hyper in hypernyms]}")

# d. WordNet Path Similarity
word1, word2 = "profit", "loss"
syn1 = wordnet.synsets(word1)[0]  # First synset
syn2 = wordnet.synsets(word2)[0]  # First synset
similarity = syn1.path_similarity(syn2)
print(f"Path similarity between '{word1}' and '{word2}': {similarity}")

# e. Word Sense Disambiguation using Lesk Algorithm
from nltk.wsd import lesk

sentence = "The company made a huge profit last quarter."
disambiguated_sense = lesk(sentence.split(), "profit")
print(f"Disambiguated sense for 'profit': {disambiguated_sense}")


Synsets for 'profit': ['net_income.n.01', 'profit.n.02', 'profit.v.01', 'profit.v.02']
Synsets for 'loss': ['loss.n.01', 'loss.n.02', 'loss.n.03', 'loss.n.04', 'loss.n.05', 'loss.n.06', 'personnel_casualty.n.01', 'passing.n.02']
Synsets for 'growth': ['growth.n.01', 'growth.n.02', 'increase.n.03', 'growth.n.04', 'emergence.n.01', 'growth.n.06', 'growth.n.07']
Synsets for 'investment': ['investing.n.01', 'investment.n.02', 'investment.n.03', 'investment.n.04', 'investment.n.05', 'investment.n.06']
Synonyms for 'profit': {'earnings', 'net_income', 'profits', 'turn_a_profit', 'lucre', 'benefit', 'net_profit', 'net', 'profit', 'gain'}
Antonyms for 'profit': {'lose'}
Synonyms for 'loss': {'deprivation', 'expiration', 'red', 'departure', 'personnel_casualty', 'red_ink', 'loss', 'release', 'going', 'exit', 'passing'}
Antonyms for 'loss': {'gain'}
Synonyms for 'growth': {'maturation', 'development', 'outgrowth', 'growth', 'ontogeny', 'growing', 'emergence', 'increment', 'ontogenesis', 'increas

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
