In [1]:
# all libraries required
import pandas as pd  # for data handling
import matplotlib.pyplot as plt  # for graphs
from sklearn.feature_extraction.text import TfidfVectorizer  # to convert text to numbers
from sklearn.linear_model import LogisticRegression  # model for prediction
from sklearn.model_selection import train_test_split  # to split training and test data
from sklearn.metrics import accuracy_score, classification_report  #it willcheck performance

In [2]:
import pandas as pd
# this will load the dataset i have prepared for reviews
path = "/content/drive/MyDrive/reviews.csv"
df = pd.read_csv(path)
df.head(100)

Unnamed: 0,Review,Sentiment
0,Poor performance and bad experience. Really nice.,Negative
1,Poor performance and bad experience..,Negative
2,The quality was awful and cheap.,Negative
3,Fantastic value for money.!,Positive
4,Amazing quality and fast delivery.,Positive
...,...,...
95,Bad customer service.!,Negative
96,"I want a refund, this is horrible.!",Negative
97,I had a great experience.,Positive
98,Extremely buggy and slow..,Negative


In [3]:
# it will Convert sentiment column into numeric values using dictionary
d = {'Positive': 1, 'Negative': 0}
df['Sentiment'] = df['Sentiment'].map(d)

X = df['Review']  # input is the review text
y = df['Sentiment']  # output is the sentiment

combine = pd.concat([X, y], axis=1)  # show input and output colum together

print(combine.head(31))


                                               Review  Sentiment
0   Poor performance and bad experience. Really nice.          0
1               Poor performance and bad experience..          0
2                    The quality was awful and cheap.          0
3                         Fantastic value for money.!          1
4                  Amazing quality and fast delivery.          1
5           High-quality material and smooth finish..          1
6            Works better than expected. Really nice.          1
7              Extremely buggy and slow. Never again.          0
8                          Fantastic value for money.          1
9    The color and design are beautiful. Totally bad.          1
10    Amazing quality and fast delivery. Totally bad.          1
11               The color and design are beautiful..          1
12                   Very dissatisfied, total waste.!          0
13                  I love this product! Really nice.          1
14       It stopped worki

In [4]:
#Convert text into TF-IDF numbers
vectorizer = TfidfVectorizer()  # create vectorizer
X_tfidf = vectorizer.fit_transform(X)  # apply to the review text

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=0)

#Train model
model = LogisticRegression()  # creatation of logistic regression
model.fit(X_train, y_train)  # train

# this will Predict the sentiment using test data
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))  # to show accuracy


Accuracy: 1.0


In [5]:
# Test the model with a new review
test_review = ["The product was amazing and I loved it!"]
test_review_tfidf = vectorizer.transform(test_review)
prediction = model.predict(test_review_tfidf)  # prediction based on the above review

if prediction[0] == 1:
    print("Predicted Sentiment: Positive :) ")
else:
    print("Predicted Sentiment: Negative :( ")


Predicted Sentiment: Positive :) 
