In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score



In [2]:
df = pd.read_csv("CSV/reviews.csv")

print(df.head())

                                         Review Text  sentiment
0  Absolutely wonderful - silky and sexy and comf...          1
1  Love this dress!  it's sooo pretty.  i happene...          1
2  I love, love, love this jumpsuit. it's fun, fl...          1
3  This shirt is very flattering to all due to th...          1
4  I love tracy reese dresses, but this one is no...         -1


In [3]:
X = df['Review Text']
y = df['sentiment']

#stratify=y ensures that the proportion of positive and negative reviews remain the same.
X_train,X_test,y_train,y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [4]:
#Here we convert the text reviews into numerical feature vectors.
#Each feature is a count of how many times a word appears in a review.
print("\nConverting text to numerical features using Bag-of-Words...")

#Initiate the vectorizer, 'stop_words='english'' removes common English words like the, an, a is which 
#don't carry much sentiment.
vectorzier = CountVectorizer(stop_words='english');

#Fit the vectorizer on the Training data and transform it into a matrix.
X_train_bow = vectorzier.fit_transform(X_train)

#ONLY transform the testing data using the already fitted vectorizer
X_test_bow = vectorzier.transform(X_test)

print("Text successfully converted to feature vectors")


Converting text to numerical features using Bag-of-Words...
Text successfully converted to feature vectors


In [5]:
#Initialize the model
#Max iter is increased to ensure the model has enough time to find the best weights
model = LogisticRegression(max_iter=2000)

#Train the model on our bag of words training data
model.fit(X_train_bow,y_train)

print("Model training complete")

Model training complete


In [6]:
print("\n Evaluating model performance on the test set")

#Make predictions on the test data
y_pred = model.predict(X_test_bow)

#Test accuracy

accuracy = accuracy_score(y_test, y_pred)

print(f"Model accuracy : {accuracy:.4f} ({accuracy:.2%})")


 Evaluating model performance on the test set
Model accuracy : 0.9294 (92.94%)


In [7]:
dress_reviews = [
    "Absolutely love this dress! The fit is perfect and the fabric feels luxurious. Got so many compliments!",
    "The color was a bit different from the pictures, but it's still a beautiful dress. A little tight in the chest though.",
    "Meh. The material feels cheap and it wrinkles easily. Not worth the price in my opinion.",
    "This dress is a showstopper! Perfect for formal events. Hugs all the right places and flows like a dream."
]
X_review_bow = vectorzier.transform(dress_reviews)
y_pred_2 = model.predict(X_review_bow)

for i in range(len(y_pred_2)):
    if y_pred_2[i] >= 1:
        print({dress_reviews[i] } ,"Sentiment : positive")
    elif y_pred_2[i] < 0:
        print({dress_reviews[i] },"Sentiment : negative")


{'Absolutely love this dress! The fit is perfect and the fabric feels luxurious. Got so many compliments!'} Sentiment : positive
{"The color was a bit different from the pictures, but it's still a beautiful dress. A little tight in the chest though."} Sentiment : positive
{'Meh. The material feels cheap and it wrinkles easily. Not worth the price in my opinion.'} Sentiment : negative
{'This dress is a showstopper! Perfect for formal events. Hugs all the right places and flows like a dream.'} Sentiment : positive


In [8]:
import joblib

joblib.dump(model, 'model.joblib')

['model.joblib']

In [9]:
!pip install google-cloud-aiplatform

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.111.0-py2.py3-none-any.whl.metadata (38 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0,>=1.34.1 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0,>=1.34.1->google-cloud-aiplatform)
  Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting google-auth<3.0.0,>=2.14.1 (from google-cloud-aiplatform)
  Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)
Collecting proto-plus<2.0.0,>=1.22.3 (from google-cloud-aiplatform)
  Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.20.2 (from google-cloud-aiplatform)
  Downloading protobuf-6.32.0-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Collecting google-cloud-storage<3.0.0,>=1.32.0 (from google-cloud-aiplatform)
  Downloading google

In [10]:
from google.cloud import aiplatform

In [17]:


aiplatform.init(project="project-bp-470604", location="us-central1")
endpoint = aiplatform.Endpoint(
    endpoint_name='7675087041620606976'
)
print("Successfully Created Endpoint "+endpoint.resource_name)

Successfully Created Endpoint projects/732623445589/locations/us-central1/endpoints/7675087041620606976


In [18]:
new_review = ["The dress looks stunning and I got lots of compliments, though the zipper feels a bit flimsy."]

sparse_matrix = vectorzier.transform(new_review)

numpy_array = sparse_matrix.toarray()

processed_review = numpy_array.tolist()



In [21]:
response = endpoint.predict(instances=processed_review)

print(response.predictions[0])
print(type(response))

1
<class 'google.cloud.aiplatform.models.Prediction'>
