# Sentiment Analysis Tool in Python (NLP)

This notebook demonstrates how to build a simple sentiment analysis tool using Python. We'll use NLP techniques to extract and analyze customer feedback data from websites.

In [None]:

# Install necessary libraries if you haven't already
!pip install pandas numpy nltk scikit-learn matplotlib seaborn


## Step 1: Import the Required Libraries

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string


## Step 2: Download NLTK Data

In [None]:

nltk.download('punkt')
nltk.download('stopwords')


## Step 3: Data Preprocessing

In [None]:

# Sample customer feedback data
data = {'feedback': ['This product is amazing!', 'I had a terrible experience.', 
                     'Customer service was excellent.', 'The quality is bad.', 
                     'I am very happy with the purchase.'],
        'label': ['positive', 'negative', 'positive', 'negative', 'positive']}
df = pd.DataFrame(data)

# Display the dataset
df.head()

# Define stopwords and punctuations
stop_words = set(stopwords.words('english'))
punctuations = set(string.punctuation)

# Text cleaning function
def clean_text(text):
    tokens = word_tokenize(text.lower())  # Tokenize and convert to lowercase
    tokens = [word for word in tokens if word not in stop_words and word not in punctuations]  # Remove stop words and punctuation
    return ' '.join(tokens)

# Apply the cleaning function
df['cleaned_feedback'] = df['feedback'].apply(clean_text)
df.head()


## Step 4: Feature Extraction

In [None]:

# Convert text data to numerical features using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['cleaned_feedback'])  # Features
y = df['label']  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 5: Model Building

In [None]:

# Initialize and train a Naive Bayes classifier
nb = MultinomialNB()
nb.fit(X_train, y_train)

# Predict on test data
y_pred = nb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Display confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


## Step 6: Testing the Tool

In [None]:

# Test with a new feedback sample
new_feedback = ["The product is great and I love it!"]
cleaned_new_feedback = [clean_text(feedback) for feedback in new_feedback]
new_feedback_vector = vectorizer.transform(cleaned_new_feedback)

# Predict sentiment
predicted_sentiment = nb.predict(new_feedback_vector)
print(f"Predicted sentiment: {predicted_sentiment[0]}")
