# Sanity Check Notebook
This is a sanity check scripts to validate training / inference consistency and getting insights into what the model is actually doing. It requires:

1. The saved training / testing / validation datasets 
2. The trained model (**checkpoint**)

It runs inference on the training / testing / validation datasets showing input / target / predictions

In [2]:
import sys
sys.path.append('..')
from transformer_modules import HFT5GenerationModel
import pytorch_lightning as pl
import numpy as np
from transformers import T5Tokenizer
from datasets import load_from_disk


In [3]:
checkpoint_path = '../models/t5-small-SGD/logs/schema-guided/train-batch256-adafactor/checkpoints/epoch=7-step=5139.ckpt'
dataset_name = "GEMSGD"
tokenizer_name = "T5Tokenizer"
linearizer_name = "SGD_SchemaGuidedLinearizer"
train_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_train'
val_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_val'
test_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_test'

In [4]:
model = HFT5GenerationModel.load_from_checkpoint(checkpoint_path=checkpoint_path)

In [5]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')

In [17]:
from numpy.random import default_rng
import pandas as pd
SEED=20
TEST_SIZE=10
rng = default_rng()
idx = rng.choice(SEED, size=TEST_SIZE, replace=False)
chosen_idx = [92, 504, 2340, 2579, 2864, 3546, 3667, 4463]

def sample_and_generate_with_split(dataset_path, chosen_idx=[]):
    dataset = load_from_disk(dataset_path)
    dataset.set_format('torch', columns=['input_ids','labels'])
    dataset = dataset[chosen_idx] if chosen_idx else dataset[idx] 
    input_text = tokenizer.batch_decode(dataset['input_ids'], skip_special_tokens=True)
    target_text = tokenizer.batch_decode(dataset['labels'], skip_special_tokens=True)
    output = model.forward(dataset['input_ids'])
    pred_text = tokenizer.batch_decode(output, skip_special_tokens=True)
    return input_text, target_text, pred_text

def check_split(dataset_path, chosen_idx=[]):
    input_text, target_text, pred_text = sample_and_generate_with_split(dataset_path, chosen_idx=chosen_idx)
    res_dict = {'input':[], 'target':[], 'pred': []}
    for in_t, tar_t, pred_t in zip(input_text, target_text, pred_text):
        res_dict['input'].append(in_t)
        res_dict['target'].append(tar_t)
        res_dict['pred'].append(pred_t)
    res_df = pd.DataFrame(res_dict)
    return res_df

In [20]:
def inspect_model_output(dataset_name, tokenizer_name, linearizer_name, model_path, split='test', chosen_idx=[]):
    train_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_train'
    val_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_val'
    test_dataset_path = f'../data/{dataset_name}_{tokenizer_name}{linearizer_name}_test'
    dataset_path = None
    if split == 'test':
        dataset_path = test_dataset_path
    elif split == 'train':
        dataset_path = train_dataset_path
    elif split == 'val':
        dataset_path = val_dataset_path
    else:
        raise ValueError("split = [train | test | val]")
    assert (os.path.isdir(dataset_path))
    model = HFT5GenerationModel.load_from_checkpoint(checkpoint_path=checkpoint_path)
    res_df = check_split(dataset_path, chosen_idx=chosen_idx)
    return res_df

In [11]:
pd.options.display.max_colwidth = 400
pd.options.display.width = 400

In [13]:
train_df = check_split(train_dataset_path)

In [19]:
train_df

Unnamed: 0,input,target,pred
0,INFORM ( street_address = Milpitas Square ) INFORM ( price_range = moderate ),"Price range is moderate, The address is Milpitas Square",The address is Milpitas Square and the price range is moderate.
1,REQUEST ( cuisine ) REQUEST ( city ),"Sure, I will help you, What type of food are you looking for? Which city should i search in?",What kind of food do you want and in which city?
2,OFFER ( restaurant_name = Bird Dog ) OFFER ( city = Palo Alto ) INFORM_COUNT ( count = 7 ),I see that 7 restaurants suit to what you requested. Bird Dog seems as a good restaurant and is located in Palo Alto.,I found 7 restaurants. Bird Dog is a nice restaurant in Palo Alto.
3,CONFIRM ( restaurant_name = Bird Dog ) CONFIRM ( city = Palo Alto ) CONFIRM ( time = 11:30 am ) CONFIRM ( party_size = 2 ) CONFIRM ( date = today ),Can you please confirm that you want to book a table for 2 at 11:30 am at the Bird Dog restaurant in Palo Alto for today.,Please confirm the following details: Booking a table for 2 at Bird Dog in Palo Alt
4,OFFER ( restaurant_name = Anjappar Chettinad Restaurant ) OFFER ( city = Milpitas ),"I found a good restaurant in Milpitas, The restaurant name is Anjappar Chettinad Restaurant",Anjappar Chettinad Restaurant is a nice restaurant in Milpita
5,CONFIRM ( restaurant_name = Olive Garden Italian Restaurant ) CONFIRM ( city = Milpitas ) CONFIRM ( time = 1:30 pm ) CONFIRM ( date = March 11th ) CONFIRM ( party_size = 2 ),"Please confirm the following details: Booking a table at Olive Garden Italian Restaurant at Milpitas on March 11th reservation time is 1:30 pm, The reservation is for 2 people",Please confirm: table for 2 at Olive Garden Italian Restaurant in Milpitas at 1:30
6,REQUEST ( city ),Do you have a specific which you want the eating place to be located at?,What city should I look in?
7,OFFER_INTENT ( intent = ReserveRestaurant ),shall i reserve a table here for you?,Do you want to book a table?
8,OFFER ( restaurant_name = Aria Dining & Banquets Fine Indian Cuisine ) OFFER ( city = Milpitas ),"I found another restaurant for you, Aria Dining & Banquets Fine Indian Cuisine Restaurant at Milpitas",I found Aria Dining & Banquets Fine Indian Cuisine in Milpitas.
9,GOODBYE,Have a good time!,Have a great day.


In [18]:
test_df = check_split(test_dataset_path, chosen_idx=chosen_idx)

In [19]:
test_df

Unnamed: 0,input,target,pred
0,REQUEST ( End date of the trip ),I need to know the starting date and return date of your travel. Shall I try to search for the flight on March 3rd as leaving date?,When are you leaving?
1,OFFER ( The company that provides air transport services = Alaska Airlines ) OFFER ( Departure time of the flight flying to the destination = 1:05 pm ) OFFER ( Whether the flight is a direct one = True ) OFFER ( Departure time of the flight coming back from the trip = 10:05 am ) OFFER ( The total cost of the flight tickets = $101 ),Are you interested in flying with Alaska Airlines? The starting flight will takes off at 1:05 pm and the returning flight takes off at 10:05 am. The price of the ticket is $101.,Alaska Airlines has a direct flight that departs at 10:05 am. The flight costs
2,OFFER ( The company that provides air transport services = Southwest Airlines ) OFFER ( Departure time of the flight flying to the destination = 4 pm ) OFFER ( Whether the flight is a direct one = True ) OFFER ( Departure time of the flight coming back from the trip = 5:10 pm ) OFFER ( The total cost of the flight tickets = $124 ),How about a direct Southwest Airlines flight whose onward flight leaves at 4 pm and the return flight takes off at 5:10 pm with the cost of the ticket as $124?,Southwest Airlines has a direct flight that departs at 5:10 pm. It costs $1
3,OFFER ( The company that provides air transport services = Southwest Airlines ) OFFER ( Departure time of the flight flying to the destination = 4 pm ) OFFER ( Whether the flight is a direct one = True ) OFFER ( Departure time of the flight coming back from the trip = 5:10 pm ) OFFER ( The total cost of the flight tickets = $130 ),There's a Southwest Airlines direct flight leaving at 4 pm and returning at 5:10 pm if you want it. It costs $130.,Southwest Airlines has a direct flight that departs at 5:10 pm. It costs $1
4,OFFER ( The company that provides air transport services = American Airlines ) OFFER ( Departure time of the flight flying to the destination = 11 pm ) OFFER ( Whether the flight is a direct one = True ) OFFER ( Departure time of the flight coming back from the trip = 6:15 pm ) OFFER ( The total cost of the flight tickets = $218 ),"There's also a direct American Airlines flight taking of at 11 pm, with the return flight at 6:15 pm. Tickets are priced at $218.",American Airlines has a direct flight that departs at 6:15 pm. It costs $2
5,REQUEST ( The name of the airport or city to depart from ),Will you leave on March 8th? Where do you depart from?,Where are you departing from?
6,OFFER ( The company that provides air transport services = Southwest Airlines ) OFFER ( Departure time of the flight flying to the destination = 4 pm ) OFFER ( Whether the flight is a direct one = True ) OFFER ( Departure time of the flight coming back from the trip = 1:10 pm ) OFFER ( The total cost of the flight tickets = $218 ) INFORM_COUNT ( count = 2 ),i found 2 for you. there's a southwest airlines flight that leaves at 4 pm and the flight back leaves at 1:10 pm and it costs $218.,I found 2 flights for you. Southwest Airlines has a direct flight that departs at 1:1
7,REQUEST ( The name of the airport or city to depart from ),"What city do you want to depart from? Also, will it be on March 10th?",Where are you departing from?


In [22]:
val_df = check_split(val_dataset_path)


In [23]:
val_df

Unnamed: 0,input,target,pred
0,INFORM ( price_range = moderate ),"Nope, the price is moderate.",The price range is moderate.
1,GOODBYE,Have a good day!,Have a great day.
2,REQUEST ( shared_ride ),A shared ride?,Do you want a shared ride?
3,GOODBYE,Have a great day!,Have a great day.
4,OFFER_INTENT ( intent = ReserveCar ),Would you like to reserve a car now?,Would you like to reserve this car?
5,OFFER ( hotel_name = Aloft Phoenix-Airport ) OFFER ( star_rating = 3 ) INFORM_COUNT ( count = 10 ),"There are 10 hotels you might like. There is Aloft Phoenix-Airport, it has a 3 star rating.",I found 10 hotels. Aloft Phoenix-Airport is a 3 star hotel.
6,CONFIRM ( song_name = Carnivore ) CONFIRM ( playback_device = TV ),Please confirm the song Carnivore on tv.,Please confirm the following: Play Carnivore on TV.
7,CONFIRM ( account_type = checking ) CONFIRM ( transfer_amount = $1600 ) CONFIRM ( recipient_name = philip ) CONFIRM ( recipient_account_type = checking ),You would like to transfer $1600 to philip to my checking account to his checking account.,Please confirm the following details: Transfer $1600 from your checking account to philip checking account
8,GOODBYE,Have a good day.,Have a great day.
9,OFFER ( hotel_name = AC Hotel By Marriott Beverly Hills ) OFFER ( star_rating = 1 ),AC Hotel By Marriott Beverly Hills is a decent hotel out there with 1 star.,AC Hotel By Marriott Beverly Hills is a nice 1 star hotel.
