In [1]:
!pip install pandas numpy nltk scikit-learn matplotlib




In [2]:
import pandas as pd
import numpy as np
import nltk
import re
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [5]:
data = {
    'text': [
        "I love this phone! It's amazing.",
        "This is the worst product ever.",
        "Absolutely fantastic! I highly recommend it.",
        "I hate this. Waste of money!",
        "Not bad, but could be better.",
        "I really enjoy using this.",
        "Terrible experience, very disappointed.",
        "It’s okay, nothing special.",
        "I’m so happy with my purchase!",
        "I regret buying this, not good at all."
    ],
    'sentiment': ['positive', 'negative', 'positive', 'negative', 'neutral',
                  'positive', 'negative', 'neutral', 'positive', 'negative']
}

df = pd.DataFrame(data)  # Convert to a table
print(df)


                                           text sentiment
0              I love this phone! It's amazing.  positive
1               This is the worst product ever.  negative
2  Absolutely fantastic! I highly recommend it.  positive
3                  I hate this. Waste of money!  negative
4                 Not bad, but could be better.   neutral
5                    I really enjoy using this.  positive
6       Terrible experience, very disappointed.  negative
7                   It’s okay, nothing special.   neutral
8                I’m so happy with my purchase!  positive
9        I regret buying this, not good at all.  negative


In [3]:
nltk.download('stopwords')
from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    words = text.split()
    words = [word for word in words if word not in stopwords.words('english')]  # Remove stopwords
    return ' '.join(words)

df['cleaned_text'] = df['text'].apply(preprocess_text)
print(df[['text', 'cleaned_text']])


                                           text  \
0              I love this phone! It's amazing.   
1               This is the worst product ever.   
2  Absolutely fantastic! I highly recommend it.   
3                  I hate this. Waste of money!   
4                 Not bad, but could be better.   
5                    I really enjoy using this.   
6       Terrible experience, very disappointed.   
7                   It’s okay, nothing special.   
8                I’m so happy with my purchase!   
9        I regret buying this, not good at all.   

                            cleaned_text  
0                     love phone amazing  
1                     worst product ever  
2  absolutely fantastic highly recommend  
3                       hate waste money  
4                       bad could better  
5                     really enjoy using  
6       terrible experience disappointed  
7                   okay nothing special  
8                         happy purchase  
9       

In [8]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_text'])  # Convert text to numbers
y = df['sentiment']  # Labels (positive, negative, neutral)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
model = MultinomialNB()  # Create model
model.fit(X_train, y_train)  # Train model on data


In [11]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))  # Check accuracy
print("Classification Report:\n", classification_report(y_test, y_pred))  # Detailed performance


Accuracy: 0.5
Classification Report:
               precision    recall  f1-score   support

    negative       0.50      1.00      0.67         1
    positive       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
def predict_sentiment(text):
    processed_text = preprocess_text(text)  # Clean the text
    vectorized_text = vectorizer.transform([processed_text])  # Convert to numbers
    prediction = model.predict(vectorized_text)  # Predict sentiment
    return prediction[0]

print(predict_sentiment("I love this product!"))  # Expected: positive
print(predict_sentiment("Worst experience ever."))  # Expected: negative


positive
negative
