Sentiment Analysis

In [None]:
# Install once
# pip install nltk

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

text = "I really loved this workshop, it was amazing!"

score = sia.polarity_scores(text)
print(score)

# Final sentiment
if score['compound'] >= 0.05:
    print("Positive")
elif score['compound'] <= -0.05:
    print("Negative")
else:
    print("Neutral")


{'neg': 0.0, 'neu': 0.376, 'pos': 0.624, 'compound': 0.8513}
Positive


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


NER

In [None]:
# Install once
# pip install spacy
# python -m spacy download en_core_web_sm

import spacy

nlp = spacy.load("en_core_web_sm")

text = "Elon Musk founded SpaceX in 2002 in the United States."

doc = nlp(text)

for ent in doc.ents:
    print(ent.text, "->", ent.label_)

Elon Musk -> PERSON
2002 -> DATE
the United States -> GPE


Summarization

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# The previous call to pipeline("summarization", ...) failed because
# 'summarization' is not a recognized task in this version/installation
# of the transformers library as indicated by the KeyError.
# To perform summarization, we will directly load the tokenizer and model
# and then use the model's generate method.

model_name = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

text = """
Artificial Intelligence is transforming industries by automating tasks,
improving efficiency, and enabling data-driven decision making.
However, AI also raises ethical concerns such as data privacy,
bias, and job displacement.
"""

# Prepare the input for the model
inputs = tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)

# Generate the summary with specified parameters
summary_ids = model.generate(
    inputs["input_ids"],
    num_beams=4,  # A common setting for summarization
    max_length=50,
    min_length=25,
    early_stopping=True
)

# Decode the generated summary
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print(summary_text)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Please make sure the generation config includes `forced_bos_token_id=0`. 


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/358 [00:00<?, ?it/s]



 Artificial Intelligence is transforming industries by automating tasks,improving efficiency, and enabling data-driven decision making . However, AI also raises ethical concerns such as data privacy, bias, and job displacement .


In [None]:
!pip install nltk spacy transformers torch
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m93.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# ===============================
# NLP TEXT ANALYZER PROJECT
# ===============================

import nltk
import spacy
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Download VADER
nltk.download('vader_lexicon')

# Load models
sia = SentimentIntensityAnalyzer()
nlp = spacy.load("en_core_web_sm")

# Load summarization model and tokenizer directly
summarization_model_name = "sshleifer/distilbart-cnn-12-6"
summarization_tokenizer = AutoTokenizer.from_pretrained(summarization_model_name)
summarization_model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name)

# -------------------------------
# SENTIMENT ANALYSIS
# -------------------------------
def analyze_sentiment(text):
    scores = sia.polarity_scores(text)
    compound = scores['compound']

    if compound >= 0.05:
        sentiment = "Positive"
    elif compound <= -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, scores


# -------------------------------
# NAMED ENTITY RECOGNITION
# -------------------------------
def extract_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities


# -------------------------------
# TEXT SUMMARIZATION
# -------------------------------
def summarize_text(text):
    if len(text.split()) < 50:
        return "Text too short to summarize."

    inputs = summarization_tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
    summary_ids = summarization_model.generate(
        inputs["input_ids"],
        num_beams=4,
        max_length=60,
        min_length=25,
        early_stopping=True
    )
    summary_text = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary_text


# -------------------------------
# MAIN FUNCTION
# -------------------------------
def analyze_text(text):
    print("\n📌 ORIGINAL TEXT:")
    print(text)

    sentiment, scores = analyze_sentiment(text)
    entities = extract_entities(text)
    summary = summarize_text(text)

    print("\n🧠 SENTIMENT ANALYSIS:")
    print("Sentiment:", sentiment)
    print("Scores:", scores)

    print("\n🏷️ NAMED ENTITIES:")
    if entities:
        for ent in entities:
            print(ent[0], "->", ent[1])
    else:
        print("No entities found.")

    print("\n📝 SUMMARY:")
    print(summary)


# -------------------------------
# RUN
# -------------------------------
if __name__ == "__main__":
    input_text = """
    Elon Musk founded SpaceX in 2002.
    The company has revolutionized the space industry by reducing launch costs.
    Many people admire SpaceX for its innovation, though some criticize its risks.
    """

    analyze_text(input_text)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Loading weights:   0%|          | 0/358 [00:00<?, ?it/s]




📌 ORIGINAL TEXT:

    Elon Musk founded SpaceX in 2002.
    The company has revolutionized the space industry by reducing launch costs.
    Many people admire SpaceX for its innovation, though some criticize its risks.
    

🧠 SENTIMENT ANALYSIS:
Sentiment: Positive
Scores: {'neg': 0.133, 'neu': 0.706, 'pos': 0.161, 'compound': 0.25}

🏷️ NAMED ENTITIES:
Elon Musk -> PERSON
2002 -> DATE

📝 SUMMARY:
Text too short to summarize.
