In [2]:
import pandas as pd
import re
import os

#  Load CSV into DataFrame
csv_path = r"C:\Users\abira\OneDrive\Desktop\NULL CLASS\Reviews.csv"
df = pd.read_csv(csv_path)

#  Quick look at data
print(df.head())
print(df.columns)

#  Check for missing values and drop if any
print(df.isnull().sum())
df = df.dropna(subset=['Text', 'Score'])  # keep only rows with text and score

#  Create sentiment labels based on 'Score' column
def get_sentiment(score):
    if score <= 2:
        return 'negative'
    elif score == 3:
        return 'neutral'
    else:
        return 'positive'

df['sentiment'] = df['Score'].apply(get_sentiment)

#  View distribution of sentiments
print(df['sentiment'].value_counts())

#  Basic text preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['clean_text'] = df['Text'].apply(preprocess_text)

print(df[['clean_text', 'sentiment']].head())

# Save cleaned dataset in same folder as original file
output_path = os.path.join(
    os.path.dirname(csv_path),
    "Reviews_cleaned.csv"
)
df.to_csv(output_path, index=False)
print(f"✅ Cleaned dataset saved to: {output_path}")


   Id   ProductId          UserId                      ProfileName  \
0   1  B001E4KFG0  A3SGXH7AUHU8GW                       delmartian   
1   2  B00813GRG4  A1D87F6ZCVE5NK                           dll pa   
2   3  B000LQOCH0   ABXLMWJIXXAIN  Natalia Corres "Natalia Corres"   
3   4  B000UA0QIQ  A395BORC6FGVXV                             Karl   
4   5  B006K2ZZ7K  A1UQRSCLF8GW1T    Michael D. Bigham "M. Wassir"   

   HelpfulnessNumerator  HelpfulnessDenominator  Score        Time  \
0                     1                       1      5  1303862400   
1                     0                       0      1  1346976000   
2                     1                       1      4  1219017600   
3                     3                       3      2  1307923200   
4                     0                       0      5  1350777600   

                 Summary                                               Text  
0  Good Quality Dog Food  I have bought several of the Vitality canned d...  
1 

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib

#  Load the cleaned dataset
csv_path = r"C:\Users\abira\OneDrive\Desktop\NULL CLASS\Reviews_cleaned.csv"
df = pd.read_csv(csv_path)

# Remove NaN values in clean_text
df['clean_text'] = df['clean_text'].fillna("")

#  Split data into training and testing sets
X = df['clean_text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train_tfidf, y_train)

#  Predict before evaluating
y_pred = model.predict(X_test_tfidf)

#  Evaluate model
print(" Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

#  Save the model and vectorizer for chatbot integration
joblib.dump(model, r"C:\Users\abira\OneDrive\Desktop\NULL CLASS\sentiment_model.pkl")
joblib.dump(vectorizer, r"C:\Users\abira\OneDrive\Desktop\NULL CLASS\tfidf_vectorizer.pkl")

print(" Model and vectorizer saved for chatbot use.")


 Accuracy: 0.8702271947647571

Classification Report:
               precision    recall  f1-score   support

    negative       0.74      0.69      0.71     16407
     neutral       0.53      0.22      0.31      8528
    positive       0.90      0.97      0.93     88756

    accuracy                           0.87    113691
   macro avg       0.72      0.62      0.65    113691
weighted avg       0.85      0.87      0.86    113691

 Model and vectorizer saved for chatbot use.


In [4]:
import joblib

# Load saved model and vectorizer
model = joblib.load(r"C:\\Users\\abira\\OneDrive\\Desktop\\NULL CLASS\\sentiment_model.pkl")
vectorizer = joblib.load(r"C:\\Users\\abira\\OneDrive\\Desktop\\NULL CLASS\\tfidf_vectorizer.pkl")

print("\n💬 Amazon Fine Food Review Chatbot (type 'exit' to quit)\n")

while True:
    user_input = input("You: ")

    # Exit condition
    if user_input.lower().strip() == "exit":
        print("Bot: Goodbye! 👋")
        break

    # Transform input
    user_tfidf = vectorizer.transform([user_input])

    # Predict sentiment
    prediction = model.predict(user_tfidf)[0].lower()

    # Respond based on sentiment
    if prediction == "positive":
        print("Bot: Sentiment is positive 😊")
        print("Bot Reply: Thank you so much for your kind words! We’re thrilled you enjoyed it and can’t wait to serve you again.")
    elif prediction == "negative":
        print("Bot: Sentiment is negative 😞")
        print("Bot Reply: We’re sorry your experience wasn’t as expected. Your feedback helps us improve, and we’ll work to make it right next time.")
    else:
        print("Bot: Sentiment is neutral 😐")
        print("Bot Reply: Thank you for sharing your thoughts. We appreciate your suggestions and will use them to make your next experience even better.")



💬 Amazon Fine Food Review Chatbot (type 'exit' to quit)



You:   I am very satisfied ,product is as advertised, I use it on cereal, with raw vinegar, and as a general sweetner.


Bot: Sentiment is positive 😊
Bot Reply: Thank you so much for your kind words! We’re thrilled you enjoyed it and can’t wait to serve you again.


You:   product arrived labeled as jumbo salted peanutsthe peanuts were actually small sized unsalted not sure if this was an error or if the vendor intended to represent the product as jumbo


Bot: Sentiment is negative 😞
Bot Reply: We’re sorry your experience wasn’t as expected. Your feedback helps us improve, and we’ll work to make it right next time.


You:  this seems a little more wholesome than some of the supermarket brands but it is somewhat mushy and doesnt have quite as much flavor either it didnt pass muster with my kids so i probably wont buy it again


Bot: Sentiment is neutral 😐
Bot Reply: Thank you for sharing your thoughts. We appreciate your suggestions and will use them to make your next experience even better.


You:  exit


Bot: Goodbye! 👋
