<a href="https://colab.research.google.com/github/Snehadevop/devopss/blob/main/Task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Import Libraries
import pandas as pd
import re
import nltk

from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download stopwords
nltk.download("stopwords")

# Create Dataset
reviews = [
    "This product is amazing and works great",
    "Worst purchase I ever made",
    "Very happy with the quality",
    "Terrible experience, not recommended",
    "Excellent performance and good value",
    "Completely useless item",
    "Loved it, will buy again",
    "Waste of money",
    "Very satisfied with the service",
    "Bad quality and poor performance"
]

labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

dataset = pd.DataFrame({"Review": reviews, "Sentiment": labels})

# Text Cleaning Function
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()
    text = re.sub("[^a-z ]", "", text)
    filtered_words = [word for word in text.split() if word not in stop_words]
    return " ".join(filtered_words)

dataset["Processed_Review"] = dataset["Review"].apply(clean_text)

# Split Data
X_train, X_test, y_train, y_test = train_test_split(
    dataset["Processed_Review"],
    dataset["Sentiment"],
    test_size=0.3,
    random_state=1
)

# Convert Text to TF-IDF Features
tfidf = TfidfVectorizer(max_features=50)
X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)

# Build Logistic Regression Model
classifier = LogisticRegression(solver="liblinear")
classifier.fit(X_train_vec, y_train)

# Predictions
predicted = classifier.predict(X_test_vec)

# Evaluation
print("Model Accuracy:", accuracy_score(y_test, predicted))
print("\nClassification Report:\n", classification_report(y_test, predicted))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, predicted))

# Test with New Review
new_review = ["The item is wonderful and has excellent quality"]
new_review_clean = [clean_text(text) for text in new_review]
new_review_vec = tfidf.transform(new_review_clean)

result = classifier.predict(new_review_vec)

print("\nPrediction for New Review:",
      "Positive Review" if result[0] == 1 else "Negative Review")

Model Accuracy: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       2.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0


Confusion Matrix:
 [[0 1]
 [2 0]]

Prediction for New Review: Negative Review


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
