In [7]:
import json
import os

import pandas as pd
import numpy as np

def read_json_to_df(file_name):
    data = []
    with open(file_name) as data_file:
        for line in data_file:
            # Load each line of the JSON file as a dictionary
            data.append(json.loads(line))

    # Form a Pandas DataFrame from the dictionaries
    return pd.json_normalize(data)

# Load the training and test data
raw_train_df = read_json_to_df("hotel_reviews_train.json")
raw_test_df = read_json_to_df("hotel_reviews_test.json")

ratings_columns = [col for col in raw_train_df.columns if col.startswith("ratings.")]

# Select the title, text and overall rating columns to make a new dataframe
train_df = raw_train_df[["title", "text"] + ratings_columns]
test_df = raw_test_df[["title", "text"] + ratings_columns]

# Save the English reviews to a CSV file to save time filtering when running again (NumFOCUS, Inc. 2024)
if os.path.exists("english_hotel_reviews_train.csv"):
    train_df = pd.read_csv("english_hotel_reviews_train.csv")

if os.path.exists("english_hotel_reviews_test.csv"):
    test_df = pd.read_csv("english_hotel_reviews_test.csv")

train_df = train_df.fillna(0)
test_df = test_df.fillna(0)

inputs = train_df[ratings_columns]
outputs = train_df['text']

inputs_test = test_df[ratings_columns]
outputs_test = test_df['text']

In [2]:
def format_input(rating) -> str:
    input = "Generate a hotel review based on the following ratings:\n"
    service = rating.iloc[0]
    cleanliness = rating.iloc[1]
    overall = rating.iloc[2]
    value = rating.iloc[3]
    location = rating.iloc[4]
    sleep_quality = rating.iloc[5]
    rooms = rating.iloc[6]
    check_in_front_desk = rating.iloc[7]
    business_service = rating.iloc[8]

    if service != 0:
      input += f"- service: {service}\n"
    if cleanliness != 0:
      input += f"- cleanliness: {cleanliness}\n"
    if overall != 0:
      input += f"- overall: {overall}\n"
    if value != 0:
      input += f"- value: {value}\n"
    if location != 0:
      input += f"- location: {location}\n"
    if sleep_quality != 0:
      input += f"- sleep quality: {sleep_quality}\n"
    if rooms != 0:
      input += f"- rooms: {rooms}\n"
    if check_in_front_desk != 0:
      input += f"- check in: {check_in_front_desk}\n"
    if business_service != 0:
      input += f"- business service: {business_service}\n"

    input += "Review: "

    return input

In [19]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments

from datasets import Dataset

dataset = Dataset.from_dict({"ratings": inputs.apply(format_input, axis=1).to_list(), "review_text": outputs.to_list()})

dataset = dataset.select(range(1000))

# Preprocessing function for dataset
def preprocess(examples):
    model_inputs = tokenizer(examples["ratings"], truncation=True, padding="max_length", max_length=128)
    labels = tokenizer(examples["review_text"], truncation=True, padding="max_length", max_length=512)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')

tokenized = dataset.map(preprocess, batched=True)
training_args = TrainingArguments(output_dir='./results', per_device_train_batch_size=4, num_train_epochs=10)

trainer = Trainer(model=model, args=training_args, train_dataset=tokenized)
trainer.train()

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Step,Training Loss
500,1.8451
1000,1.4989
1500,1.4779
2000,1.4652
2500,1.4598


TrainOutput(global_step=2500, training_loss=1.54935205078125, metrics={'train_runtime': 640.6375, 'train_samples_per_second': 15.609, 'train_steps_per_second': 3.902, 'total_flos': 338354503680000.0, 'train_loss': 1.54935205078125, 'epoch': 10.0})

In [None]:
def generate_review(ratings_str):
    device = model.device
    inputs = tokenizer(ratings_str, return_tensors="pt").to(device)
    output = model.generate(**inputs, max_length=512)
    return tokenizer.decode(output[0], skip_special_tokens=True)


test_dataset = Dataset.from_dict({"ratings": inputs_test.apply(format_input, axis=1).to_list(), "review_text": outputs_test.to_list()})

test_dataset = test_dataset.select(range(100))  # Select first 100 rows

from rouge import Rouge
rouge = Rouge()

# Example
for example in test_dataset:
    generated_review = generate_review(example["ratings"])

    print("Input:", example["ratings"])
    print("Generated review:", generated_review)
    print("Actual review:", example["review_text"])
    # Evaluate the summaries using the ROUGE metric
    scores_abstractive = rouge.get_scores(generated_review, example["review_text"])

    # Print the summaries and ROUGE scores
    print("ROUGE Scores for Abstractive Summary: ", scores_abstractive)
    print('\n')

Input: Generate a hotel review based on the following ratings:
- service: 5.0
- cleanliness: 5.0
- overall: 5.0
- value: 5.0
- location: 5.0
- sleep quality: 5.0
- rooms: 5.0
Review: 
Generated review: I stayed at the Mercure Hotel in the City of New York City in the summer of 2007. The hotel is very clean and the staff is very friendly and helpful. The hotel is very clean and the rooms are very comfortable. The hotel is very close to the shopping center and the airport. The hotel is very convenient to shopping and restaurants. The hotel is very close to the city and the shops.
Actual review: My husband and I just celebrated our 25th wedding anniversary. We normally stay at very high class hotels but that night we wanted to try something new without spending a fortune. (St. Regis, Ritz, etc are our norms) We wanted the Tribeca area so we pulled up to the hotel (unassuming front) The Bell guys were so nice and took care of our bags right away. While checking in we had at least 3 people 

In [25]:
from bert_score import score
from rouge import Rouge
from tqdm import tqdm 

rouge = Rouge()

generated_texts = []
reference_texts = []

# Iterate through the dataset
for example in tqdm(test_dataset):
    generated_review = generate_review(example["ratings"])
    reference_review = example["review_text"]

    generated_texts.append(generated_review)
    reference_texts.append(reference_review)

# Compute average ROUGE scores over all examples
scores = rouge.get_scores(generated_texts, reference_texts, avg=True)

# Print nicely formatted scores
print("=== AVERAGE ROUGE SCORES ===")
for metric, sub_scores in scores.items():
    print(f"{metric.upper()}:")
    for sub_metric, value in sub_scores.items():
        print(f"  {sub_metric}: {value:.4f}")

P, R, F1 = score(generated_texts, reference_texts, lang="en", verbose=True)

# Print average scores
print("=== BERTScore Results ===")
print(f"Precision: {P.mean().item():.4f}")
print(f"Recall:    {R.mean().item():.4f}")
print(f"F1 Score:  {F1.mean().item():.4f}")

100%|██████████| 100/100 [01:35<00:00,  1.04it/s]


=== AVERAGE ROUGE SCORES ===
ROUGE-1:
  r: 0.1802
  p: 0.4054
  f: 0.2312
ROUGE-2:
  r: 0.0482
  p: 0.0835
  f: 0.0547
ROUGE-L:
  r: 0.1711
  p: 0.3852
  f: 0.2198


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/4 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/2 [00:00<?, ?it/s]

done in 9.60 seconds, 10.42 sentences/sec
=== BERTScore Results ===
Precision: 0.8311
Recall:    0.8233
F1 Score:  0.8268


In [26]:
generated_review = generate_review("""Input: Generate a hotel review based on the following ratings:
- service: 1.0
- cleanliness: 1.0
- overall: 1.0
- value: 1.0
- sleep quality: 1.0
Review: 
""")
print(generated_review)

I've never been to a hotel before I went to the hotel. The hotel is a great value for the price. The staff are very friendly and helpful. The hotel is very clean and the room was very clean and the staff was very helpful. The hotel is very nice. The hotel is a great place to stay. The hotel is a great place to stay. The price is very reasonable. The room was very comfortable and the room was very comfortable. The staff were very helpful and helpful. The room was spacious. The room was very clean and clean. I'd rather stay there. I've had a great view of the city. The hotel is very nice. The location is very close to the hotel is a great value for money. The hotel is a great location. The location is a little bit of a little bit expensive. The hotel is very clean and the rooms are very comfortable. The staff is very friendly and helpful. The hotel is very clean. The hotel is very close to the city center. The room was very comfortable and comfortable. The room was clean and the room was