
## NLP Sentiment Analysis using Python

This notebook demonstrates how to implement a basic sentiment analysis pipeline using natural language processing (NLP) techniques and machine learning. We will use libraries such as NLTK, Scikit-learn, and Pandas.

First, we will set up the environment by installing the necessary packages.


In [None]:

!pip install nltk spacy gensim pandas scikit-learn


In [None]:

# Import NLTK and download necessary data
import nltk
nltk.download('punkt')  # Download tokenizer data

# Tokenizing a sample sentence into words
from nltk.tokenize import word_tokenize

# Sample sentence for demonstration
sentence = "Hello, world! This is NLP."
tokens = word_tokenize(sentence)
print(tokens)  # Output the tokenized words


In [None]:

# Download and load the English stopwords
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Remove stopwords from the tokenized words
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print(filtered_tokens)  # Output the words after removing stopwords


In [None]:

# Import stemming and lemmatization utilities from NLTK
from nltk.stem import PorterStemmer, WordNetLemmatizer
nltk.download('wordnet')  # Download wordnet data for lemmatization

# Initialize the stemmer and lemmatizer
ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()

# Example of stemming and lemmatization
print(ps.stem("faster"))  # Output the stemmed version of 'faster'
print(lemmatizer.lemmatize("faster"))  # Output the lemmatized version of 'faster'


In [None]:

# Load necessary libraries for sentiment analysis
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Create a dataset with example movie reviews and their sentiment labels
data = {
    'text': [
        'I love this movie!',
        'This was a terrible movie.',
        'I really enjoyed the film.',
        'Worst experience ever.',
        'It was fantastic!',
        'Not worth the time.',
        'Absolutely amazing!',
        'It was okay, not great.',
        'I hate this film.',
        'Best movie ever!'
    ],
    'sentiment': [
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'neutral',
        'negative',
        'positive'
    ]
}

# Convert the dataset into a DataFrame
df = pd.DataFrame(data)
print(df)  # Display the dataset


In [None]:

# Split the data into training (80%) and testing (20%) sets
X = df['text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numerical features using CountVectorizer
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train a Multinomial Naive Bayes model using the training data
model = MultinomialNB()
model.fit(X_train_vectorized, y_train)


In [None]:

# Use the trained model to predict sentiment labels for the test data
y_pred = model.predict(X_test_vectorized)

# Calculate and display the accuracy of the model
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Generate a confusion matrix to further assess the model's performance
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(confusion_matrix)


In [None]:

# Function to predict sentiment of a new text input
def predict_sentiment(text):
    text_vectorized = vectorizer.transform([text])
    prediction = model.predict(text_vectorized)
    return prediction[0]

# Example: Predict sentiment of a new review
new_text = "I loved the plot and the acting!"
print(f'Sentiment: {predict_sentiment(new_text)}')
