In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from gensim.models import KeyedVectors

In [2]:
from src.data_processing.process_labels import *
from src.data_processing.process_reviews import *
from src.data_processing.train_val_test import train_val_test
from src.models.model_zoo import *
from src.models.model_train import *
from src.models.model_evalaute import *

## Load in saved LSTM torch model

In [3]:
from src.models.model_zoo import *
from src.models.model_train import *

# instatiate model
input_size = 300
hidden_size = 300
num_layers = 2
dropout_prob = 0.3
output_size = 4
lstm_model = LSTMmodel(input_size=input_size, hidden_size=hidden_size, 
                       output_size=output_size, num_layers=num_layers, dropout_prob=dropout_prob)

In [4]:
count_parameters(lstm_model)

3612004

In [5]:
# load saved parameters
lstm_model.load_state_dict(torch.load('src/models/saved_models/lstm_model.pt'))
lstm_model.eval()

LSTMmodel(
  (LSTM): LSTM(300, 300, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  (linear): Linear(in_features=600, out_features=4, bias=True)
  (softmax): Softmax(dim=1)
)

## Sanity check: try made up review

In [6]:
# load word2vec model
model = KeyedVectors.load('word2vec/word2vec-google-news-300.model') # Load word2vec model

In [7]:
madeup = ['i liked their pizza']
lstm_model(process_reviews_w2v(madeup, model))

Fetching review embeddings: 100%|██████████| 1/1 [00:00<00:00, 67.25it/s]


tensor([[0.3155, 0.0370, 0.6094, 0.0381]], grad_fn=<SoftmaxBackward0>)

The model is not able to correctly classify any common sense test cases, except for text that are both food and service. Therefore, we now focus on finetuning a BERT model instead of training an LSTM from scratch.

## Get performance metrics

In [8]:
## PROCESS DATA
# Read data
df = pd.read_csv('data/raw_reviews/reviews_v1.csv')
# Separate reviews and labels
reviews = df.text
food_labels = df.food
service_labels = df.service
# Get target labels
y = label_generator(food_labels=food_labels.values, 
                    service_labels=service_labels.values).trim_and_fetch_labels()
# Trim reviews to size of labels (y)
reviews = reviews[:len(y)].copy()
# Get word2vec embedded reviews
model = KeyedVectors.load('word2vec/word2vec-google-news-300.model') # Load word2vec model
x_all = process_reviews_w2v(reviews=reviews, model=model) # (1000, max review length, 300)
x_all = x_all[:, :200, :] # cut reviews to only keep first 200 words
# Train/Val/Test split
x_train, x_val, x_test = train_val_test(x_all, train_frac=0.6, val_frac=0.2, test_frac=0.2)
y_train, y_val, y_test = train_val_test(y, train_frac=0.6, val_frac=0.2, test_frac=0.2)

Fetching review embeddings: 100%|██████████| 1000/1000 [00:03<00:00, 251.86it/s]


In [9]:
# output softmax probabilities for x test
y_scores = lstm_model(x_test)

# convert to one-hot predicted labels
pred_labels = torch.argmax(y_scores, dim=1)
y_pred = torch.nn.functional.one_hot(pred_labels, num_classes=4)

In [10]:
# get performance metrics
classes = ['only food', 'only service', 'both', 'neither']
performance_df, accuracy = multi_performance(y_true=y_test, y_pred=y_pred, classes=classes)
performance_df

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,only food,only service,both,neither
f1,0.0,0.0,0.675497,0.0
precision,0.0,0.0,0.51,0.0
recall,0.0,0.0,1.0,0.0


In [11]:
print(f'Overall accuracy is: {accuracy}')

Overall accuracy is: 0.51


In [12]:
# save results as csv
performance_df.to_csv('results/lstm_performance_metrics.csv')