In [1]:
!pip install transformers tensorflow




In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample, InputFeatures
import tensorflow as tf

# Load the dataset
file_path = 'Evaluation-dataset.csv'
data = pd.read_csv(file_path)

# Extract the review texts and subtheme sentiments
reviews = data.iloc[:, 0].values
subtheme_sentiments = data.iloc[:, 1:].values

# Preprocess the data
preprocessed_data = []
for review, sentiments in zip(reviews, subtheme_sentiments):
    for sentiment in sentiments:
        if pd.notna(sentiment):
            parts = sentiment.rsplit(' ', 1)
            if len(parts) == 2:
                subtheme, sentiment_polarity = parts
                preprocessed_data.append({
                    'review': review,
                    'subtheme': subtheme,
                    'sentiment': sentiment_polarity
                })

# Convert the preprocessed data into a DataFrame
preprocessed_df = pd.DataFrame(preprocessed_data)

# Manually balance the dataset (example)
# Note: Ideally, you should add more examples to balance the dataset better
# Here we just duplicate some negative examples for illustration
negative_examples = preprocessed_df[preprocessed_df['sentiment'] == 'negative']
preprocessed_df = pd.concat([preprocessed_df, negative_examples])

# Encode the labels
le_subtheme = LabelEncoder()
le_sentiment = LabelEncoder()

preprocessed_df['subtheme'] = le_subtheme.fit_transform(preprocessed_df['subtheme'])
preprocessed_df['sentiment'] = le_sentiment.fit_transform(preprocessed_df['sentiment'])

# Split the data into training and test sets
X = preprocessed_df['review']
y_subtheme = preprocessed_df['subtheme']
y_sentiment = preprocessed_df['sentiment']
X_train, X_test, y_train_subtheme, y_test_subtheme, y_train_sentiment, y_test_sentiment = train_test_split(
    X, y_subtheme, y_sentiment, test_size=0.2, random_state=42)

# Tokenize and encode the dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def convert_data_to_examples(reviews, labels):
    return [InputExample(guid=None, text_a=review, text_b=None, label=label) for review, label in zip(reviews, labels)]

def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):
    features = []

    for e in examples:
        inputs = tokenizer.encode_plus(
            e.text_a,
            add_special_tokens=True,
            max_length=max_length,
            pad_to_max_length=True,
            return_attention_mask=True,
        )
        input_ids, token_type_ids, attention_mask = inputs["input_ids"], inputs["token_type_ids"], inputs["attention_mask"]

        features.append(
            InputFeatures(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label)
        )

    def gen():
        for f in features:
            yield (
                {
                    "input_ids": f.input_ids,
                    "attention_mask": f.attention_mask,
                    "token_type_ids": f.token_type_ids,
                },
                f.label,
            )

    return tf.data.Dataset.from_generator(
        gen,
        ({ "input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32 }, tf.int64),
        ({ "input_ids": tf.TensorShape([None]), "attention_mask": tf.TensorShape([None]), "token_type_ids": tf.TensorShape([None]) }, tf.TensorShape([])),
    )

# Convert data to examples and then to tf.dataset
train_examples_subtheme = convert_data_to_examples(X_train, y_train_subtheme)
test_examples_subtheme = convert_data_to_examples(X_test, y_test_subtheme)
train_dataset_subtheme = convert_examples_to_tf_dataset(train_examples_subtheme, tokenizer)
test_dataset_subtheme = convert_examples_to_tf_dataset(test_examples_subtheme, tokenizer)

train_examples_sentiment = convert_data_to_examples(X_train, y_train_sentiment)
test_examples_sentiment = convert_data_to_examples(X_test, y_test_sentiment)
train_dataset_sentiment = convert_examples_to_tf_dataset(train_examples_sentiment, tokenizer)
test_dataset_sentiment = convert_examples_to_tf_dataset(test_examples_sentiment, tokenizer)

train_dataset_subtheme = train_dataset_subtheme.shuffle(100).batch(16).repeat(3)
test_dataset_subtheme = test_dataset_subtheme.batch(16)

train_dataset_sentiment = train_dataset_sentiment.shuffle(100).batch(16).repeat(3)
test_dataset_sentiment = test_dataset_sentiment.batch(16)

# Print subtheme and sentiment names
print("Subtheme Names:")
for subtheme_name in le_subtheme.classes_:
    print(subtheme_name)

print("\nSentiment Names:")
for sentiment_name in le_sentiment.classes_:
    print(sentiment_name)

# Build, train, and evaluate the model for subtheme classification
model_subtheme = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(le_subtheme.classes_))

model_subtheme.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model_subtheme.fit(train_dataset_subtheme, epochs=3, steps_per_epoch=115)

preds_subtheme = model_subtheme.predict(test_dataset_subtheme).logits
y_pred_subtheme = tf.argmax(preds_subtheme, axis=1).numpy()
print(classification_report(y_test_subtheme, y_pred_subtheme, labels=range(len(le_subtheme.classes_)), target_names=le_subtheme.classes_))

# Build, train, and evaluate the model for sentiment classification
model_sentiment = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(le_sentiment.classes_))

model_sentiment.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model_sentiment.fit(train_dataset_sentiment, epochs=3, steps_per_epoch=115)

preds_sentiment = model_sentiment.predict(test_dataset_sentiment).logits
y_pred_sentiment = tf.argmax(preds_sentiment, axis=1).numpy()
print(classification_report(y_test_sentiment, y_pred_sentiment, labels=range(len(le_sentiment.classes_)), target_names=le_sentiment.classes_))

# Function to predict subthemes and sentiments for a new review
def predict_subtheme_and_sentiment(review):
    inputs = tokenizer.encode_plus(review, return_tensors="tf")
    subtheme_logits = model_subtheme(inputs)[0]
    sentiment_logits = model_sentiment(inputs)[0]
    predicted_subtheme = tf.argmax(subtheme_logits, axis=1).numpy()[0]
    predicted_sentiment = tf.argmax(sentiment_logits, axis=1).numpy()[0]
    return le_subtheme.inverse_transform([predicted_subtheme])[0], le_sentiment.inverse_transform([predicted_sentiment])[0]

# Example of predicting a new sentence
new_review = "The service was bad and the staff were not friendly."
predicted_subtheme, predicted_sentiment = predict_subtheme_and_sentiment(new_review)
print(f"Review: {new_review}")
print(f"Predicted Subtheme: {predicted_subtheme}")
print(f"Predicted Sentiment: {predicted_sentiment}")


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3
                                                                                                                                                                                                                                                                                                                                                                                                                                                                precision    recall  f1-score   support

                                                                                                                                                                                                                                                                                                                                                                                                                                                      Cheapest       0.00      0.00      0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3
               precision    recall  f1-score   support

            .       0.00      0.00      0.00         0
       again.       0.00      0.00      0.00         0
       choice       0.00      0.00      0.00         0
     choice!.       0.00      0.00      0.00         0
       coffee       0.00      0.00      0.00         0
communication       0.00      0.00      0.00         0
    courteous       0.00      0.00      0.00         1
     decision       0.00      0.00      0.00         0
deliveries...       0.00      0.00      0.00         0
          do.       0.00      0.00      0.00         0
      either.       0.00      0.00      0.00         0
        else.       0.00      0.00      0.00         0
 experiences.       0.00      0.00      0.00         1
      fitter.       0.00      0.00      0.00         1
      fitting       0.00      0.00      0.00         0
     fitting.       0.00      0.00      0.00         0
         free       0.00      0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Review: The service was bad and the staff were not friendly.
Predicted Subtheme: garage service
Predicted Sentiment: negative


In [32]:
# Example of predicting a new sentence
new_review = """This is not a review of [REDACTED].com as such but my experience of the garage booked through them (A&S MOT Ltd).A terrible experience meaning I will never use A&S MOT Ltd again. Details of the issues are below.- Arranged and paid for two new tyres to be fitted and 4-wheel alignment as rear tyres wearing on inside- Took time off work and waited at garage. Work done and was given paperwork. Wheels dirty - but as expected- Left for work and steering wheel off-centre when going straight- Got to work and checked values on paperwork. Rear out of alignment and before/after figures unchanged- Left work early and returned to garage. Told rear not adjustable and the only way to do it is to fit new parts- Arranged to bring car back next day to sort steering- Rang Audi to ask about part required etc. and was told that all settings ARE adjustable by any competent garage, both front AND rear- Went to another wheel alignment place and asked them to confirm. They put car on ramp and within 30 seconds said the same and they didn't even need any specialist tools etc.- Took more time off work to return to A&S as agreed. Told them my findings re adjustment and they said they would sort it along with the off-centre steering wheel- When finished told "all done" - given no paperwork. Asked for printout and once received checked it whilst there. Rear STILL not adjusted- Asked for an explanation. Wasn't given one but told I would need to bring it back AGAIN- I then went on ~100 mile journey on which someone almost drove into me. I checked the dash cam. Found out it had been moved to point at the sky. Reviewed the footage and it was an employee at A&S that moved it prior to taking the car for a drive - I can only assume he wanted to drive it like he stole it- Cleaned car at the weekend afterwards and the gunk that was on the wheels was actually all over - especially one. Was also stuck on like glue. Took some fairly abrasive wheel cleaner and a lot of effort to remove- Checked the pressures of the new tyres - they had been set too high - higher than the maximum load pressures indicated on the car itselfIn summary, DON’T use this garage if you value your vehicle, time, money or safety."""
predicted_subtheme, predicted_sentiment = predict_subtheme_and_sentiment(new_review)
print(f"Review: {new_review}")
print(f"Predicted Subtheme: {predicted_subtheme}")
print(f"Predicted Sentiment: {predicted_sentiment}")
# print(subtheme_classes)


Review: This is not a review of [REDACTED].com as such but my experience of the garage booked through them (A&S MOT Ltd).A terrible experience meaning I will never use A&S MOT Ltd again. Details of the issues are below.- Arranged and paid for two new tyres to be fitted and 4-wheel alignment as rear tyres wearing on inside- Took time off work and waited at garage. Work done and was given paperwork. Wheels dirty - but as expected- Left for work and steering wheel off-centre when going straight- Got to work and checked values on paperwork. Rear out of alignment and before/after figures unchanged- Left work early and returned to garage. Told rear not adjustable and the only way to do it is to fit new parts- Arranged to bring car back next day to sort steering- Rang Audi to ask about part required etc. and was told that all settings ARE adjustable by any competent garage, both front AND rear- Went to another wheel alignment place and asked them to confirm. They put car on ramp and within 30

In [34]:
import os

# Directory to save the models and tokenizer
save_directory_subtheme = '/content/drive/MyDrive/oriserve_assignment/saved_model_subtheme2'
save_directory_sentiment = '/content/drive/MyDrive/oriserve_assignment/saved_model_sentiment2'

# Create directories if they don't exist
if not os.path.exists(save_directory_subtheme):
    os.makedirs(save_directory_subtheme)
if not os.path.exists(save_directory_sentiment):
    os.makedirs(save_directory_sentiment)

# Save the subtheme classification model
model_subtheme.save_pretrained(save_directory_subtheme)
tokenizer.save_pretrained(save_directory_subtheme)

# Save the sentiment classification model
model_sentiment.save_pretrained(save_directory_sentiment)
tokenizer.save_pretrained(save_directory_sentiment)

print(f"Subtheme classification model and tokenizer saved to {save_directory_subtheme}")
print(f"Sentiment classification model and tokenizer saved to {save_directory_sentiment}")


Subtheme classification model and tokenizer saved to /content/drive/MyDrive/oriserve_assignment/saved_model_subtheme2
Sentiment classification model and tokenizer saved to /content/drive/MyDrive/oriserve_assignment/saved_model_sentiment2


In [60]:
from transformers import BertTokenizer, TFBertForSequenceClassification
import tensorflow as tf

# Directory where the models and tokenizer are saved
save_directory_subtheme = '/content/drive/MyDrive/oriserve_assignment/saved_model_subtheme'
save_directory_sentiment = '/content/drive/MyDrive/oriserve_assignment/saved_model_sentiment'

# Load the tokenizer and models
tokenizer = BertTokenizer.from_pretrained(save_directory_subtheme)
model_subtheme = TFBertForSequenceClassification.from_pretrained(save_directory_subtheme)
model_sentiment = TFBertForSequenceClassification.from_pretrained(save_directory_sentiment)

# Define the actual classes used during training
subtheme_classes = [
    "Cheapest", "Garage was quick & efficient with fitting of", "Great garage fitted",
    "ability to browse through various makes of tyres before making a",
    "and an excellent service from the garage who fitted the",
    "and both times have been good", "cheaper than anything else I have found by some",
    "clean reception area and free", "easily navigable web",
    "easy to browse selection of tyres before making a",
    "faster than dealerships to arrange supply and partnership with ATS works well",
    "fitting took a long", "good", "good local fitting",
    "good range and competitive prices on", "great", "hassle",
    "it feels more transparent than buying from a mechanic based on their",
    "keep it up! Will definitely buy from you", "my tyre supplier of",
    "one tyre with dangerously low air pressure. Asked the garage to top up the tyre and check the other three. They did top up the one tyre but didn't bother to check the others. When I checked the tyres 10 minutes later all four tyres had varying pressures (from 29 to 37.5). I emailed both companies and have not received an apology from either. I will never use Lavender Motors again (they shouldn't be in business) and likely not [REDACTED].com",
    "only 8 as the garage was unable to fit the tyres in the allotted time and I had to wait over an",
    "polite", "prompt", "really friendly and efficient",
    "really good", "recommended a great local shop for",
    "simple booking for fitting", "simple fitting", "simple to use",
    "slick delivery service and good", "staff were", "telephone help avaiable if",
    "the process was simple and quick.Regular offers make it easy to find premium tyres at decent",
    "the site is easy to", "they looked grey and old (but unused) not at all how new tyres normally look. Not surprising when I inspected the 'Dot Date' and it was 4517 (ie tyres manufactured in week 45 of 2017.Queried this with [REDACTED] who told me that they have many different suppliers and do not check the age of the tyres.They could re-order but no guarantee that the replacements would be any newer. Not at all happy and requested a refund which they say they will",
    "tje garage i choice to go to in order to have the tyres fitted was nit that good and i certainly will not be using them",
    "wide choice of tyres at great prices including delivery and",
    "won’t be using anyone", "advisor/agent service",
    "advisoragent service", "balancing", "booking confusion",
    "call wait time", "change of date", "change of time",
    "damage", "delivery punctuality", "discount not applied",
    "discounts", "ease of booking", "extra charges",
    "facilities", "failed payment", "garage service",
    "incorrect tyres sent", "late notice", "length of fitting",
    "location", "mobile fitter", "mobile fitter didn't arrive",
    "no", "no stock", "refund", "refund not actioned",
    "refund timescale", "response time", "tyre age/dot code",
    "tyre agedot code", "tyre quality", "value for money",
    "wait time"
]

# Define the sentiment names
sentiment_names = [
    ".", "again", "choice", "choice!", "coffee", "communication", "courteous",
    "decision", "deliveries...", "do.", "either.", "else.", "experiences.",
    "fitter.", "fitting", "fitting.", "free", "hour.", "issues", "negative",
    "opinion.", "positive", "price", "price.", "prices", "prices.", "procedure",
    "procedure.", "required.", "service", "service.", "services", "site", "staff",
    "them", "time.", "tyre.", "tyres", "use", "way.", "website"
]

# Function to predict subthemes and sentiments for a new review
def predict_subtheme_and_sentiment(review):
    inputs = tokenizer.encode_plus(review, return_tensors="tf", max_length=128, truncation=True, padding='max_length')
    subtheme_logits = model_subtheme(inputs)[0]
    sentiment_logits = model_sentiment(inputs)[0]
    predicted_subtheme_idx = tf.argmax(subtheme_logits, axis=1).numpy()[0]
    predicted_sentiment_idx = tf.argmax(sentiment_logits, axis=1).numpy()[0]
    predicted_subtheme = subtheme_classes[predicted_subtheme_idx]
    predicted_sentiment = sentiment_names[predicted_sentiment_idx]
    return predicted_subtheme, predicted_sentiment



Some layers from the model checkpoint at /content/drive/MyDrive/oriserve_assignment/saved_model_subtheme were not used when initializing TFBertForSequenceClassification: ['dropout_75']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at /content/drive/MyDrive/oriserve_assignment/saved_model_subtheme.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without f

In [62]:
# Example of predicting a new sentence
new_review = "Excellent service and good price too, have used previously and will use again"
predicted_subtheme, predicted_sentiment = predict_subtheme_and_sentiment(new_review)
print(f"Review: {new_review}")
print(f"Predicted Subtheme: {predicted_subtheme}")
print(f"Predicted Sentiment: {predicted_sentiment}")


Review: Excellent service and good price too, have used previously and will use again
Predicted Subtheme: value for money
Predicted Sentiment: positive
