In [3]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
from transformers import pipeline, AutoTokenizer
from tqdm import tqdm

In [4]:
# Load the car reviews dataset
file_path = "/content/Car_Reviews.csv"
df = pd.read_csv(file_path)

In [5]:
df.head()

Unnamed: 0,Vehicle_Title,Review,Recommend
0,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,Electrical nightmare. Dealer cost for repair...,No
1,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,"I traded out of my Acura due to gas prices, o...",No
2,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,Gas mileage is terrible. No front seat driver...,No
3,2012 Hyundai Azera Sedan 4dr Sedan (3.3L 6cyl 6A),Warning Do not buy Hyundai Azera. It has engi...,No
4,2007 Hyundai Azera Sedan Limited 4dr Sedan (3....,I was a big advocate of my Hyundai azera. I d...,No


In [6]:
df.size

32034

# **Instruction 1**

- Use a pre-trained LLM to classify the sentiment of the five car reviews in the `car_reviews.csv` dataset, and evaluate the classification accuracy and F1 score of predictions.
  - Store the model outputs in `predicted_labels`, then extract the labels and map them onto a list of `{0,1}` integer binary labels called `predictions`.
  - Store the calculated metrics in `accuracy_result` and `f1_result`.


In [8]:
# Task 1: Sentiment Classification with Token-Level Truncation
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
sentiment_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=0)

def tokenize_and_truncate(text, max_length):
    tokens = tokenizer(text, truncation=True, padding=False, max_length=max_length, return_tensors="pt")
    return tokenizer.decode(tokens['input_ids'][0], skip_special_tokens=True)

df["Truncated_Review"] = df["Review"].apply(lambda x: tokenize_and_truncate(x, 512))

# Process reviews in batches to avoid memory issues
batch_size = 1000
predicted_labels = []

for i in tqdm(range(0, df.shape[0], batch_size)):
    batch_reviews = df["Truncated_Review"].iloc[i:i+batch_size].tolist()
    batch_predictions = sentiment_classifier(batch_reviews)
    predicted_labels.extend(batch_predictions)

# Map predictions to binary labels (1 for positive, 0 for negative)
predictions = [1 if label['label'] == 'POSITIVE' else 0 for label in predicted_labels]
true_labels = [0 if rec == 'No' else 1 for rec in df['Recommend']]

# Calculate accuracy and F1 score
accuracy_result = accuracy_score(true_labels, predictions)
f1_result = f1_score(true_labels, predictions)

# Output the results
print(f"Accuracy: {accuracy_result}")
print(f"F1 Score: {f1_result}")

 91%|█████████ | 10/11 [01:22<00:08,  8.43s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 11/11 [01:26<00:00,  7.88s/it]

Accuracy: 0.8711369170256602
F1 Score: 0.8572910184609002





# **Instruction 2**

- The company is recently attracting customers from Spain. Extract and pass the **first two sentences** of the first review in the dataset to an English-to-Spanish translation LLM.
  - Calculate the BLEU score to assess translation quality, using the content in `reference_translations.txt` as references.
  - Store the translated text generated by the LLM in `translated_review`.
  - Store the BLEU score metric result in `bleu_score`.

In [11]:
import nltk
from transformers import pipeline
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize

# Download the punkt tokenizer data
nltk.download('punkt')

# Initialize the translation pipeline with GPU (if available)
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es", device=0)

# Extract the first two sentences of the first review
first_two_sentences = "Electrical nightmare. Dealer cost for repairs was extremely high."

# Translate the first two sentences to Spanish
translated_review = translator(first_two_sentences)[0]['translation_text']

# Reference translation (based on the first review)
reference_translation = "Pesadilla eléctrica. El costo de las reparaciones en el concesionario fue extremadamente alto."

# Tokenize for BLEU score calculation
reference_tokens = word_tokenize(reference_translation)
translated_tokens = word_tokenize(translated_review)

# BLEU score calculation
bleu_score = sentence_bleu([reference_tokens], translated_tokens)

# Store the translated review and BLEU score in the DataFrame
df["translated_review"] = ""
df["bleu_score"] = ""

df.at[0, "translated_review"] = translated_review
df.at[0, "bleu_score"] = bleu_score

# Output the first few rows to verify
df.head()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Unnamed: 0,Vehicle_Title,Review,Recommend,Truncated_Review,translated_review,bleu_score
0,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,Electrical nightmare. Dealer cost for repair...,No,electrical nightmare. dealer cost for repairs ...,El costo de las reparaciones fue extremadament...,0.383869
1,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,"I traded out of my Acura due to gas prices, o...",No,"i traded out of my acura due to gas prices, on...",,
2,2006 Hyundai Azera Sedan Limited 4dr Sedan (3....,Gas mileage is terrible. No front seat driver...,No,gas mileage is terrible. no front seat drivers...,,
3,2012 Hyundai Azera Sedan 4dr Sedan (3.3L 6cyl 6A),Warning Do not buy Hyundai Azera. It has engi...,No,warning do not buy hyundai azera. it has engin...,,
4,2007 Hyundai Azera Sedan Limited 4dr Sedan (3....,I was a big advocate of my Hyundai azera. I d...,No,i was a big advocate of my hyundai azera. i di...,,


# **Instruction 3**

- The 2nd review in the dataset emphasizes brand aspects. Load an extractive QA LLM such as `"deepset/minilm-uncased-squad2"` to formulate the question **"What did he like about the brand?"** and obtain an answer.
  - Use `question` and `context` for the two variables containing the LLM inputs: question and context.
  - Store the actual text answer in `answer`.

In [14]:
from transformers import pipeline

# Load the extractive QA pipeline
qa_pipeline = pipeline("question-answering", model="deepset/minilm-uncased-squad2", device = 0)

# Extract the second review from the DataFrame
context = df["Review"].iloc[1]  # Context is the second review
question = "What did he like about the brand?"

# Get the answer from the QA model
result = qa_pipeline(question=question, context=context)

# Store the actual text answer in the variable
answer = result['answer']

# Print the question and answer
print("Question:", question)
print("Answer:", answer)

Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Question: What did he like about the brand?
Answer: The Azera limited  handling is unstable,and bouncy


# **Instruction 4**

- Summarize the last review in the dataset into approximately **50-55 tokens** long.
  - Store the summarized output in the variable `summarized_text`.

In [15]:
from transformers import pipeline

# Load the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Extract the last review from the DataFrame
last_review = df["Review"].iloc[-1]  # Last review

# Generate the summary
summary = summarizer(last_review, max_length=55, min_length=50, length_penalty=2.0, return_text=True)

# Store the summarized output in the variable
summarized_text = summary[0]['summary_text']

# Print the summarized text
print("Summarized Text:", summarized_text)

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Summarized Text: I love my car. It is fun to drive, has  great pick up, handles well. I have no problems on busy freeways. It's very comfortable to take on long trips, as long as there are not more than 2
