In [None]:
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv("/content/Reviews.csv")

# Define a function to convert score to sentiment (data preprocessing step)
def sentiment_label(score):
    if score >= 4:
        return 'positive'
    elif score == 3:
        return 'neutral'
    else:
        return 'negative'

# Apply the sentiment_label function to the Score column
df['Sentiment'] = df['Score'].apply(sentiment_label)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['Sentiment'], test_size=0.2, random_state=42)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a classifier (Logistic Regression)
classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train_tfidf, y_train)

# Predict sentiments
y_pred = classifier.predict(X_test_tfidf)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Print classification report
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)

# Define a function to predict sentiment for new text inputs
def predict_sentiment(text):
    text_tfidf = vectorizer.transform([text])
    sentiment = classifier.predict(text_tfidf)
    return sentiment[0]

# Example usage
text_inputs = [
    "This product is amazing and works well!",
    "The quality of this product is very poor.",
    "It was a bad product."
]

for text in text_inputs:
    sentiment = predict_sentiment(text)
    print(f"Text: {text}\nSentiment: {sentiment}\n")

Accuracy: 0.8721886516962645
Classification Report:
              precision    recall  f1-score   support

    negative       0.75      0.69      0.72     16181
     neutral       0.52      0.22      0.31      8485
    positive       0.91      0.97      0.94     89025

    accuracy                           0.87    113691
   macro avg       0.72      0.63      0.65    113691
weighted avg       0.85      0.87      0.86    113691

Text: This product is amazing and works well!
Sentiment: positive

Text: The quality of this product is very poor.
Sentiment: negative

Text: It was a bad product.
Sentiment: negative

