In [3]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score,recall_score,f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
data = pd.read_csv('synthetic_product_feedback.csv')
data

Unnamed: 0,Text,Label
0,"Terrible experience, won't recommend. Customer...",bad
1,Poor quality and not worth the money. Waste of...,bad
2,"Does not match the description. Faulty item, h...",bad
3,Highly satisfied with the purchase. Would defi...,good
4,Works perfectly and as expected. Works perfect...,good
...,...,...
95,"Terrible experience, won't recommend. Faulty i...",bad
96,Performance was below expectations. Terrible e...,bad
97,Great quality and fast delivery. Excellent cus...,good
98,Late delivery and bad packaging. Waste of mone...,bad


In [None]:
stop_words = stopwords.words('english')
Vectorizer = TfidfVectorizer(max_features=300,stop_words=stop_words,lowercase=True)
X = Vectorizer.fit_transform(data['Text'])
Y = data['Label'] == 'good'

In [6]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

In [7]:
model = LogisticRegression()
model.fit(X_train,Y_train)
Y_pred = model.predict(X_test)
print("Precision : ",precision_score(Y_test,Y_pred))
print("Recall : ",recall_score(Y_test,Y_pred))
print("F1 Score : ",f1_score(Y_test,Y_pred))

Precision :  1.0
Recall :  1.0
F1 Score :  1.0


In [8]:
def text_preprocess_vectorize(texts,vectorizer):
    X_pred = vectorizer.transform(texts)
    return X_pred

In [11]:
texts = ["The sound quality is excellent and the battery lasts all day. Very impressed with the noise cancellation too.",
         "Stopped working after just two weeks. The auto-shutoff feature is unreliable. Would not recommend.",
         "Comfortable and easy to assemble. My back pain has reduced since I started using it.",
         "Difficult to install without bubbles. The edges started peeling off after a few days.",
         "Super convenient for quick smoothies. Blends ice and fruits smoothly without noise."]

vectorized_feature_matrix = text_preprocess_vectorize(texts,Vectorizer)
df_features = pd.DataFrame(vectorized_feature_matrix.toarray(),columns=Vectorizer.get_feature_names_out())
df_features

Unnamed: 0,bad,build,buy,customer,days,definitely,delivery,description,disappointed,excellent,...,top,unhelpful,useful,value,waste,well,working,works,worth,would
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.728828,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.527976,0.0,0.0,0.527976
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
