# Training model 

In [3]:
import pandas as pd

# Load dataset (adjust path if necessary)
data = pd.read_csv("amazon_com-product_reviews__20200101_20200331_sample.csv")

# Preview the dataset
print(data.head())


                            Uniq Id            Crawl Timestamp  \
0  10d8b81dc693db8a3f92e19254525d53  2020-03-17 02:32:10 +0000   
1  31ef1ee4961d3fcff33e363edd7677eb  2020-03-17 02:32:10 +0000   
2  6943e517922b051cbec496be32fbb57f  2020-03-17 02:32:10 +0000   
3  6778c794e6ac7112981d4ebb239d6826  2020-03-17 02:32:10 +0000   
4  2335a28ae24db3b2191080bfd98a86e0  2020-03-17 02:32:10 +0000   

                    Billing Uniq Id  Rating  \
0  c697dc43961526785292107b91a639e1     NaN   
1  c697dc43961526785292107b91a639e1     NaN   
2  c697dc43961526785292107b91a639e1     NaN   
3  c697dc43961526785292107b91a639e1     NaN   
4  c697dc43961526785292107b91a639e1     NaN   

                                        Review Title  Review Rating  \
0                           Best Natural Deodorant!!            5.0   
1          Schmidt's Charcoal Deodorant is the best!            5.0   
2                  Beware. Defective or counterfeit.            1.0   
3  Good for infrequent shavers in co

## Preprocess the Data

In [5]:
# data

def label_sentiment(score):
    if score > 3:
        return "positive"
    elif score == 3:
        return "neutral"
    else:
        return "negative"

# Apply the labeling function
data['Sentiment'] = data['Review Rating'].apply(label_sentiment)


In [8]:
# data


reviews = data[['Review Content', 'Sentiment']].dropna()
print(reviews.head())


                                      Review Content Sentiment
0  This is the one natural deodorant that works f...  positive
1  I am staying away from the conventional deodor...  positive
2  Do not buy this from Amazon it's either defect...  negative
3  I started using this because I was getting cys...   neutral
4  Dont buy this from amazon!!!! I never write re...  negative


## Training the Sentiment Analysis Model

In [11]:
from sklearn.model_selection import train_test_split

X = reviews['Review Content']
y = reviews['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB

model = make_pipeline(CountVectorizer(), MultinomialNB())
model.fit(X_train, y_train)

print("Model trained successfully!")


Model trained successfully!


## Evaluate the Model

In [13]:
from sklearn.metrics import classification_report

predictions = model.predict(X_test)
print(classification_report(y_test, predictions))


              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         1
    positive       0.90      1.00      0.95         9

    accuracy                           0.90        10
   macro avg       0.45      0.50      0.47        10
weighted avg       0.81      0.90      0.85        10



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Save Model

In [14]:
import joblib

joblib.dump(model, 'amazon_sentiment_model.pkl')
print("Model saved as amazon_sentiment_model.pkl")


Model saved as amazon_sentiment_model.pkl
