<a href="https://colab.research.google.com/github/Senor-Anonymous/Sentiment-Analysis/blob/main/Week%201/lexicon_sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
from collections import Counter

class LexiconSentimentAnalyzer:
    def __init__(self):
        try:
            nltk.data.find('sentiment/vader_lexicon.zip')
        except LookupError:
            nltk.download('vader_lexicon', quiet=True)

        try:
            nltk.data.find('tokenizers/punkt')
        except LookupError:
            nltk.download('punkt', quiet=True)


        try:
            nltk.data.find('tokenizers/punkt_tab')
        except LookupError:
            nltk.download('punkt_tab', quiet=True)

        self.vader_analyzer = SentimentIntensityAnalyzer()

        self.custom_lexicon = {
            'profit': 1.0,
            'growth': 0.8,
            'increase': 0.7,
            'positive': 0.6,
            'strong': 0.5,
            'neutral': 0.0,
            'stable': 0.1,
            'decline': -0.7,
            'loss': -1.0,
            'crisis': -0.9,
            'negative': -0.6,
            'weak': -0.5
        }

        print("LexiconSentimentAnalyzer initialized with VADER, TextBlob, and custom lexicon.")

    def vader_sentiment(self, text):
        scores = self.vader_analyzer.polarity_scores(text)
        compound_score = scores['compound']

        if compound_score >= 0.05:
            sentiment_label = 'positive'
        elif compound_score <= -0.05:
            sentiment_label = 'negative'
        else:
            sentiment_label = 'neutral'

        return {
            'compound': compound_score,
            'label': sentiment_label
        }

    def textblob_sentiment(self, text):
        blob = TextBlob(text)
        polarity_score = blob.sentiment.polarity
        if polarity_score >= 0.05:
            sentiment_label = 'positive'
        elif polarity_score <= -0.05:
            sentiment_label = 'negative'
        else:
            sentiment_label = 'neutral'

        return {
            'polarity': polarity_score,
            'label': sentiment_label
        }

    def custom_lexicon_sentiment(self, text):
        text_lower = text.lower()
        words = nltk.word_tokenize(text_lower)

        sentiment_score = 0.0
        matched_words_count = 0
        for word in words:
            if word in self.custom_lexicon:
                sentiment_score += self.custom_lexicon[word]
                matched_words_count += 1
        custom_score = 0.0
        if matched_words_count > 0:
            custom_score = sentiment_score / matched_words_count

        return custom_score

    def analyze(self, text):
        vader_result = self.vader_sentiment(text)
        textblob_result = self.textblob_sentiment(text)

        vader_compound = vader_result['compound']
        textblob_polarity = textblob_result['polarity']
        custom_score= self.custom_lexicon_sentiment(text)
        vader_label = vader_result['label']
        textblob_label = textblob_result['label']
        ensemble_score = (abs(vader_compound) + abs(textblob_polarity) + abs(custom_score)) / 3

        labels = [vader_label, textblob_label]
        label_counts = Counter(labels)

        # Get the most common label(s)
        most_common = label_counts.most_common()
        def sentiment_priority(label):
            if label == 'positive': return 2
            if label == 'neutral': return 1
            return 0
        if len(most_common) > 1 and most_common[0][1] == most_common[1][1]:
            tied_labels = [label for label, count in most_common if count == most_common[0][1]]

            ensemble_label = sorted(tied_labels, key=sentiment_priority, reverse=True)[0]
        else:
            ensemble_label = most_common[0][0]


        confidence_score = label_counts[ensemble_label] / len(labels)

        return {
            'text': text,
            'vader': vader_result,
            'textblob': textblob_result,
            'custom_score': custom_score,
            'ensemble_score': ensemble_score,
            'confidence_score': confidence_score
        }

In [None]:
sample_text = 'Excellent earnings beat expectations'
sample_analysis_result = analyzer.analyze(sample_text)

print(f"\nAnalysis for: '{sample_text}'")
print(sample_analysis_result)


Analysis for: 'Excellent earnings beat expectations'
{'text': 'Excellent earnings beat expectations', 'vader': {'compound': 0.5719, 'label': 'positive'}, 'textblob': {'polarity': 1.0, 'label': 'positive'}, 'custom_score': 0.0, 'ensemble_score': 0.5239666666666666, 'confidence_score': 1.0}
