<a href="https://colab.research.google.com/github/IPMSand/NLP-Module/blob/main/Sentiment_Analysis(2)ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sentiment Analysis

# custom define

In [5]:
import re

In [1]:
# Lexciob:word -> sentiment score(positive > 0, negative <0)
lexicon = {
    'good': 1.0,
    'bad': -1.5,
    'terrible': -1.5,
    'love': 2.0,
    'hate': -2.0,
    'happy': 1.5,
    'sad': -1.5,

}


In [2]:
# Negation Words
negations =   {'not','no', 'never','none'}

In [6]:
def preprocess_text(text):
  """
  Preprocesses text by converting to lowercase and tokenizing into words.
  Uses re.findall as simple tokenizer to extract words.
  """
  text = text.lower()
  #
  words = re.findall(r'\b\w+\b', text)
  return words


def lexicon_sentiment(text):
  """
  Performs lexicon-basedd sentiment analysis with negation handling.
  Returns sentiment based on summed word scores.
  """
  words = preprocess_text(text)
  score = 0.0
  negate = False
  for word in words:
    if word in negations:
      negate = True # set flag to negate next sentiment word
      continue
    if word in lexicon:
      word_score = lexicon[word]
      if negate: # if negate is True
        word_score = -word_score # flip the polarity of the score
        negate = False
      score += word_score
  if score > 0:
    return 'positive'
  elif score < 0:
    return 'negative'
  else:
    return 'neutral'

In [12]:
# Example Usage
text = "I love this product, it's not bad at all!"
print(lexicon_sentiment(text)) # output : Positive

positive


In [13]:
# Example Usage 2
text = "I love this product, it's not bad at all!. I thought I will hate it really badly when I saw it's teribble state, Hate it!!"
print(lexicon_sentiment(text)) # Output

negative


# Pre Built

In [22]:
pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [23]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Initialize VADER
analyzer  = SentimentIntensityAnalyzer()

In [24]:
# Use VASER"S lexicon
def lexicon_sentiment_vader(text):
  scores = analyzer.polarity_scores(text)
  compound = scores['compound']
  if compound >= 0.05:
    return 'positive',compound
  elif compound <= -0.05:
    return 'negative', compound
  else:
    return 'neutral', compound


In [35]:
## Example Usage
texts = ["I love this product, i thought I will hate it.",
         "He is a cute bad boy",
         "Wow That's so sick!!",
         "wow That's so sick. Hate it",
         "ok, i guess, i will ses it later"
         ]
for text in texts:
  sentiment, score = lexicon_sentiment_vader(text)
  print(f"Text: {text}\nSentiment: {sentiment}, Score: {score}\n")

Text: I love this product, i thought I will hate it.
Sentiment: positive, Score: 0.128

Text: He is a cute bad boy
Sentiment: negative, Score: -0.128

Text: Wow That's so sick!!
Sentiment: positive, Score: 0.2001

Text: wow That's so sick. Hate it
Sentiment: negative, Score: -0.5819

Text: ok, i guess, i will ses it later
Sentiment: neutral, Score: 0.0



# Machine Learning Based Sentiment Analysis

In [36]:
# Simple ML-based Sentiment Analysis
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt

In [120]:
# Example dataset (normally you'd load a bigger one) -- my defined (as I misssed the code , so not miss's)
texts = ["That was dirty. So ugly and terrible to watch", # Negative
         "wow, That was sick",  # Positive
         "I'm not bad",         # Negative
         "He's ok at drawing",  # Neutral
         "I guess tha't ok",    # Neutral
         "I like and love it"   # Positive
         ]
labels = ["negetive", "positive","negetive", "neutral","neutral","positive"]

# Here answers are incorrect cuz it has only few datasets to train on. Import
# large dataset and try %%

In [121]:
# Step 1: Convert text to bag-of-words features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

In [122]:
# Step 2: Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)

In [123]:
# Step 3: Train Logistic Regression Classifier
clf = LogisticRegression()
clf.fit(X_train, y_train)


In [124]:
# Step 4: Predict on text data
y_pred = clf.predict(X_test)

In [125]:
# Step 5: Evaluate
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.0


In [126]:
# Step 6: Try New Ex
new_texts = ["I really love this", "This is awful", "Not bad at all"]
X_new = vectorizer.transform(new_texts)
y_pred = clf.predict(X_new)
print(y_pred)

for text, pred in zip(new_texts, y_pred):
  print(f"Text: {text}\nPredicted Sentiment: {pred}\n")

['neutral' 'neutral' 'neutral']
Text: I really love this
Predicted Sentiment: neutral

Text: This is awful
Predicted Sentiment: neutral

Text: Not bad at all
Predicted Sentiment: neutral



# FROM API

>  check if this correctly run later (I did not as no API)

#
> Log into Hugging Face ---> Task --->NLP -->Text Classifications
> distibert-base-uncsed

> Token
> settings --> acess tokens --> take new token

In [None]:
# API setup
API_URL = "from Hugging Face_api_here" # replace with real api url
API_TOKEN = "from Hugging Face_api_here" # replace withh real token
headers = {"Authorization": f"Bearer {API_TOKEN}"}


In [None]:
import requests


In [None]:
def analyze_sentiment(text):
  payload = {"inputs": text}
  response = requests.post(API_URL, headers=headers, json=payload)
  if response.status_code == 200:
      result = response.json()
      sentiment = result[0]['label'] # POSITIVE OR NEGATIVE
      score = result[0]['score'] # confidence score
      return sentiment, score
  else:
      print(f"Error: {response.status_code},{response.text}")
      return None, None

In [None]:
# Example
texts = ["I love this product, it's amazing!",
         "This is terrible and wrost experince ever.",
         "It's not bad at all"
         ]
for text in texts:
  sentiment, score = analyze_sentiment(text)
  print(f"Text: {text}\nSentiment: {sentiment}, Score: {score}\n")
# Check answer later