In [4]:
#!pip install polars

In [1]:
import polars as pl

In [2]:
# Load large CSV file
df = pl.read_csv("1429_1.csv")

In [3]:
print(df.head())  # Preview the first 5 rows
print(df.shape)

shape: (5, 21)
┌────────────┬────────────┬───────────┬────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ id         ┆ name       ┆ asins     ┆ brand  ┆ … ┆ reviews.t ┆ reviews.u ┆ reviews.u ┆ reviews.u │
│ ---        ┆ ---        ┆ ---       ┆ ---    ┆   ┆ itle      ┆ serCity   ┆ serProvin ┆ sername   │
│ str        ┆ str        ┆ str       ┆ str    ┆   ┆ ---       ┆ ---       ┆ ce        ┆ ---       │
│            ┆            ┆           ┆        ┆   ┆ str       ┆ str       ┆ ---       ┆ str       │
│            ┆            ┆           ┆        ┆   ┆           ┆           ┆ str       ┆           │
╞════════════╪════════════╪═══════════╪════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ AVqkIhwDv8 ┆ All-New    ┆ B01AHB9CN ┆ Amazon ┆ … ┆ Kindle    ┆ null      ┆ null      ┆ Adapter   │
│ e3D1O-lebb ┆ Fire HD 8  ┆ 2         ┆        ┆   ┆           ┆           ┆           ┆           │
│            ┆ Tablet, 8  ┆           ┆        ┆   ┆           ┆           ┆

In [8]:
#!pip install transformers torch datasets scikit-learn


In [4]:
from transformers import BertTokenizer, BertForSequenceClassification

In [5]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [7]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)  # 3 labels: positive, negative, neutral

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
df.columns

['id',
 'name',
 'asins',
 'brand',
 'categories',
 'keys',
 'manufacturer',
 'reviews.date',
 'reviews.dateAdded',
 'reviews.dateSeen',
 'reviews.didPurchase',
 'reviews.doRecommend',
 'reviews.id',
 'reviews.numHelpful',
 'reviews.rating',
 'reviews.sourceURLs',
 'reviews.text',
 'reviews.title',
 'reviews.userCity',
 'reviews.userProvince',
 'reviews.username']

In [8]:
def preprocess(reviews):
  return tokenizer(
      reviews,
      padding=True,
      truncation=True,
      max_length=128,
      return_tensors="pt"
      )

In [9]:
reviews_text_list = [
    'This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.',
    'great for beginner or experienced person. Bought as a gift and she loves it',
    'Inexpensive tablet for him to use and learn on, step up from the NABI. He was thrilled with it, learn how to Skype on it already...',
    "I've had my Fire HD 8 two weeks now and I love it. This tablet is a great value.We are Prime Members and that is where this tablet SHINES. I love being able to easily access all of the Prime content as well as movies you can download and watch laterThis has a 1280/800 screen which has some really nice look to it its nice and crisp and very bright infact it is brighter then the ipad pro costing $900 base model. The build on this fire is INSANELY AWESOME running at only 7.7mm thick and the smooth glossy feel on the back it is really amazing to hold its like the futuristic tab in ur hands.",
    'I bought this for my grand daughter when she comes over to visit. I set it up with her as the user, entered her age and name and now Amazon makes sure that she only accesses sites and content that are appropriate to her age. Simple to do and she loves the capabilities. I also bought and installed a 64gig SD card which gives this little tablet plenty of storage. For the price I think this tablet is best one out there. You can spend hundreds of dollars more for additional speed and capacity but when it comes to the basics this tablets does everything that most people will ever need at a fraction of the cost.'
]
# reviews_text = df["reviews.text"]
# reviews_text_list = [review for review in reviews_text if review is not None]
print(reviews_text_list)

['This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.', 'great for beginner or experienced person. Bought as a gift and she loves it', 'Inexpensive tablet for him to use and learn on, step up from the NABI. He was thrilled with it, learn how to Skype on it already...', "I've had my Fire HD 8 two weeks now and I love it. This tablet is a great value.We are Prime Members and that is where this tablet SHINES. I love being able to easily access all of the Prime content as well as movies you can download and watch laterThis has a 1280/800 screen which has some really nice look to it its nice and crisp and very bright infact it is brighter then the ipad pro costing $900 base model. The build on this fire is INSANELY AWESOME running at only 7.7mm thick and the smooth glossy feel on the back it is really amazing to hold its like the futuristic tab in ur hands.", 'I bought this for my grand daughter when

In [15]:
# test = reviews_text_list[3100:3200]
# print(test)
#print(len(test))

In [16]:
# print(type(test))
# print(type(test[0]))

In [10]:
inputs = preprocess(reviews_text_list)

### Make predictions

In [11]:
import torch

# forward pass for predictions
outputs = model(**inputs)
logits = outputs.logits

# get predicted class
predictedClass = torch.argmax(logits, dim=1)

# map classes to labels
labels = ["negative", "neutral", "positive"]
predictedLabels = [labels[i] for i in predictedClass]

for review, label in zip(reviews_text_list, predictedLabels):
  print(f"Review: {review}\nSentiment: {label}\n")

Review: This product so far has not disappointed. My children love to use it and I like the ability to monitor control what content they see with ease.
Sentiment: negative

Review: great for beginner or experienced person. Bought as a gift and she loves it
Sentiment: neutral

Review: Inexpensive tablet for him to use and learn on, step up from the NABI. He was thrilled with it, learn how to Skype on it already...
Sentiment: negative

Review: I've had my Fire HD 8 two weeks now and I love it. This tablet is a great value.We are Prime Members and that is where this tablet SHINES. I love being able to easily access all of the Prime content as well as movies you can download and watch laterThis has a 1280/800 screen which has some really nice look to it its nice and crisp and very bright infact it is brighter then the ipad pro costing $900 base model. The build on this fire is INSANELY AWESOME running at only 7.7mm thick and the smooth glossy feel on the back it is really amazing to hold i