<a href="https://colab.research.google.com/github/H-RAKSHITHA-RAJU/sentiment-analysis-project-reviews/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# This cell mounts your Google Drive to the Colab environment.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Download NLTK data (only needs to be done once per session)
nltk.download('stopwords')

# --- IMPORTANT ---
# This file path now points to the file in your Google Drive.
# Make sure your folder structure matches!
filepath = '/content/drive/MyDrive/Colab_Projects/sentiment-analysis-project/data/amazon_cells_labelled.txt'

# Load the dataset
df = pd.read_csv(filepath, sep='\t', header=None, names=['review', 'sentiment'])

# Display the first 5 rows to confirm it loaded correctly
print(df.head())

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


                                              review  sentiment
0  So there is no way for me to plug it in here i...          0
1                        Good case, Excellent value.          1
2                             Great for the jawbone.          1
3  Tied to charger for conversations lasting more...          0
4                                  The mic is great.          1


In [3]:
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = re.sub('[^a-zA-Z]', ' ', text).lower()
    words = text.split()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return ' '.join(words)

df['cleaned_review'] = df['review'].apply(preprocess_text)
print("Preprocessing complete.")
print(df.head())

Preprocessing complete.
                                              review  sentiment  \
0  So there is no way for me to plug it in here i...          0   
1                        Good case, Excellent value.          1   
2                             Great for the jawbone.          1   
3  Tied to charger for conversations lasting more...          0   
4                                  The mic is great.          1   

                                 cleaned_review  
0                 way plug us unless go convert  
1                          good case excel valu  
2                                  great jawbon  
3  tie charger convers last minut major problem  
4                                     mic great  


In [6]:
# Define features (X) and target (y)
X = df['cleaned_review']
y = df['sentiment']

# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=1500) # Using more features
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Initialize and train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", report)

Model Accuracy: 81.50%

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.78      0.80        93
           1       0.82      0.84      0.83       107

    accuracy                           0.81       200
   macro avg       0.81      0.81      0.81       200
weighted avg       0.81      0.81      0.81       200

