In [1]:
data = [
    "I love sunny days! They make me feel so happy.",
    "I'm feeling sad today because it's raining.",
    "I'm not sure how I feel about this weather.",
    "The movie was fantastic! I enjoyed every moment of it.",
    "The food was terrible, I would not recommend this restaurant."
]
data

['I love sunny days! They make me feel so happy.',
 "I'm feeling sad today because it's raining.",
 "I'm not sure how I feel about this weather.",
 'The movie was fantastic! I enjoyed every moment of it.',
 'The food was terrible, I would not recommend this restaurant.']

In [2]:
!pip install nltk textblob



In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from textblob import TextBlob

# Download required NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
# Initialize stopwords and stemmer
stop_words = set(stopwords.words('english'))
ps = PorterStemmer()

In [5]:
# Function for text preprocessing
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text)
    # Convert to lower case
    tokens = [word.lower() for word in tokens]
    # Remove stopwords and perform stemming
    filtered_tokens = [ps.stem(word) for word in tokens if word.isalnum() and word not in stop_words]
    return filtered_tokens

In [6]:
# Preprocess each text in the dataset
preprocessed_data = [preprocess_text(text) for text in data]

In [7]:
# Function for sentiment analysis
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment

In [8]:
# Perform sentiment analysis on the original data
sentiment_analysis = [analyze_sentiment(text) for text in data]

In [9]:
# Output the results
for i, text in enumerate(data):
    print(f"Original Text: {text}")
    print(f"Preprocessed Text: {preprocessed_data[i]}")
    print(f"Sentiment Analysis: {sentiment_analysis[i]}")
    print("-" * 50)

Original Text: I love sunny days! They make me feel so happy.
Preprocessed Text: ['love', 'sunni', 'day', 'make', 'feel', 'happi']
Sentiment Analysis: Sentiment(polarity=0.7125, subjectivity=0.8)
--------------------------------------------------
Original Text: I'm feeling sad today because it's raining.
Preprocessed Text: ['feel', 'sad', 'today', 'rain']
Sentiment Analysis: Sentiment(polarity=-0.5, subjectivity=1.0)
--------------------------------------------------
Original Text: I'm not sure how I feel about this weather.
Preprocessed Text: ['sure', 'feel', 'weather']
Sentiment Analysis: Sentiment(polarity=-0.25, subjectivity=0.8888888888888888)
--------------------------------------------------
Original Text: The movie was fantastic! I enjoyed every moment of it.
Preprocessed Text: ['movi', 'fantast', 'enjoy', 'everi', 'moment']
Sentiment Analysis: Sentiment(polarity=0.5, subjectivity=0.8)
--------------------------------------------------
Original Text: The food was terrible, I wo