In [2]:
import pandas as pd

In [3]:
atis_train = pd.read_csv("../data/atis-intents/atis_intents_train.csv", header=None)

In [4]:
atis_test = pd.read_csv("../data/atis-intents/atis_intents_test.csv", header=None)

In [5]:
atis_train.columns = ['intent', 'message']

In [6]:
atis_train.head()

Unnamed: 0,intent,message
0,atis_flight,i want to fly from boston at 838 am and arriv...
1,atis_flight,what flights are available from pittsburgh to...
2,atis_flight_time,what is the arrival time in san francisco for...
3,atis_airfare,cheapest airfare from tacoma to orlando
4,atis_airfare,round trip fares from pittsburgh to philadelp...


In [7]:
!pip install transformers -q

[K     |████████████████████████████████| 3.8 MB 13.0 MB/s 
[K     |████████████████████████████████| 895 kB 42.1 MB/s 
[K     |████████████████████████████████| 67 kB 4.9 MB/s 
[K     |████████████████████████████████| 6.5 MB 42.3 MB/s 
[K     |████████████████████████████████| 596 kB 46.5 MB/s 
[?25h

In [8]:
import csv
import os
import argparse
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, get_cosine_with_hard_restarts_schedule_with_warmup
import warnings
warnings.filterwarnings('ignore')

In [10]:
class MyDataset(Dataset):
	def __init__(self, data_file_name, data_dir='.data/'):
		super().__init__()

		data_path = os.path.join(data_file_name)

		self.data_list = []
		self.end_of_text_token = " <|endoftext|> "
		
		with open(data_path) as csv_file:
			csv_reader = csv.reader(csv_file)
			
			for row in csv_reader:
				data_str = f"{row[0]}: {row[1]}{self.end_of_text_token}"
				self.data_list.append(data_str)
		
	def __len__(self):
		return len(self.data_list)

	def __getitem__(self, item):
		return self.data_list[item]

In [11]:
def get_data_loader(data_file_name):
	dataset = MyDataset(data_file_name)
	data_loader = DataLoader(dataset, batch_size=1, shuffle=True)
	return data_loader

In [12]:
def train(epochs, data_loader, batch_size, tokenizer, model, device):	
	batch_counter = 0
	sum_loss = 0.0

	for epoch in range(epochs):
		print (f'Running {epoch+1} epoch')

		for idx, txt in enumerate(data_loader):
			txt = torch.tensor(tokenizer.encode(txt[0]))
			txt = txt.unsqueeze(0).to(device)
			outputs = model(txt, labels=txt)
			loss, _ = outputs[:2]
			loss.backward()
			sum_loss += loss.data

			if idx%batch_size==0:
				batch_counter += 1
				optimizer.step()
				scheduler.step()
				optimizer.zero_grad()
				model.zero_grad()

			if batch_counter == 10:
				print(f"Total Loss is {sum_loss}") #printed after every 10*batch_size
				batch_counter = 0
				sum_loss = 0.0

	return model

def save_model(model, name):
	"""
	Summary:
		Saving model to the Disk
	Parameters:
		model: Trained model object
		name: Name of the model to be saved
	"""
	print("Saving model to Disk")
	torch.save(model.state_dict(), f"{name}.pt")
	return

def load_models():
	"""
	Summary:
		Loading Pre-trained model
	"""
	print('Loading/Downloading GPT-2 Model')
	tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
	model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
	return tokenizer, model

In [13]:
BATCH_SIZE = 32
EPOCHS = 3
LEARNING_RATE = 3e-5
WARMUP_STEPS = 300
MAX_SEQ_LEN = 200
MODEL_NAME = "atis_train.pt"
DATA_FILE = "../data/atis-intents/atis_intents_train.csv"

TOKENIZER, MODEL = load_models()
LOADER = get_data_loader(DATA_FILE)

DEVICE = 'cpu'
if torch.cuda.is_available():
  DEVICE = 'cuda'

model = MODEL.to(DEVICE)
model.train()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps=WARMUP_STEPS, num_training_steps=-1)

Loading/Downloading GPT-2 Model


Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [14]:
model = train(EPOCHS, LOADER, BATCH_SIZE, TOKENIZER, MODEL, DEVICE)

Running 1 epoch
Total Loss is 1739.82080078125
Total Loss is 1812.8109130859375
Total Loss is 1642.8765869140625
Total Loss is 1389.8587646484375
Total Loss is 1144.45654296875
Total Loss is 899.45703125
Total Loss is 748.5552978515625
Total Loss is 653.5350952148438
Total Loss is 587.5184936523438
Total Loss is 549.7803344726562
Total Loss is 544.780029296875
Total Loss is 504.99609375
Total Loss is 490.9941101074219
Total Loss is 470.1604919433594
Total Loss is 462.7034912109375
Running 2 epoch
Total Loss is 404.494873046875
Total Loss is 450.9009704589844
Total Loss is 419.4197692871094
Total Loss is 426.0187072753906
Total Loss is 428.8539733886719
Total Loss is 421.5927734375
Total Loss is 423.1800842285156
Total Loss is 419.42095947265625
Total Loss is 409.985595703125
Total Loss is 401.4256896972656
Total Loss is 402.74871826171875
Total Loss is 394.849853515625
Total Loss is 397.07037353515625
Total Loss is 396.4541015625
Total Loss is 393.1189270019531
Running 3 epoch
Total Lo

In [15]:
save_model(model, MODEL_NAME)

Saving model to Disk


In [21]:
def choose_from_top_k_top_n(probs, k=50, p=0.8):
	ind = np.argpartition(probs, -k)[-k:]
	top_prob = probs[ind]
	top_prob = {i: top_prob[idx] for idx,i in enumerate(ind)}
	sorted_top_prob = {k: v for k, v in sorted(top_prob.items(), key=lambda item: item[1], reverse=True)}
	
	t=0
	f=[]
	pr = []
	for k,v in sorted_top_prob.items():
	  t+=v
	  f.append(k)
	  pr.append(v)
	  if t>=p:
		  break
	top_prob = pr / np.sum(pr)
	token_id = np.random.choice(f, 1, p = top_prob)

	return int(token_id)

def generate(tokenizer, model, sentences, label):
	with torch.no_grad():
	  for idx in range(sentences):
		  finished = False
		  cur_ids = torch.tensor(tokenizer.encode(label)).unsqueeze(0).to('cpu')
		  for i in range(100):
			  outputs = model(cur_ids, labels=cur_ids)
			  loss, logits = outputs[:2]

			  softmax_logits = torch.softmax(logits[0,-1], dim=0)

			  if i < 5:
				  n = 10
			  else:
				  n = 5

			  next_token_id = choose_from_top_k_top_n(softmax_logits.to('cpu').numpy()) #top-k-top-n sampling
			  cur_ids = torch.cat([cur_ids, torch.ones((1,1)).long().to('cpu') * next_token_id], dim = 1)

			  if next_token_id in tokenizer.encode('<|endoftext|>'):
				  finished = True
				  break

		  if finished:	          
			  output_list = list(cur_ids.squeeze().to('cpu').numpy())
			  output_text = tokenizer.decode(output_list)
			  print (output_text)
		  else:
			  output_list = list(cur_ids.squeeze().to('cpu').numpy())
			  output_text = tokenizer.decode(output_list)
			  print (output_text)

def load_models(model_name):
	"""
	Summary:
		Loading the trained model
	"""
	print ('Loading Trained GPT-2 Model')
	tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
	model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
	model_path = model_name
	model.load_state_dict(torch.load(model_path))
	return tokenizer, model

In [22]:
SENTENCES = 10
MODEL_NAME = "./atis_train.pt.pt"
LABEL = "atis_airfare" # LABEL = "atis_flight"

TOKENIZER, MODEL = load_models(MODEL_NAME)

Loading Trained GPT-2 Model


In [23]:
generate(TOKENIZER, MODEL, SENTENCES, LABEL)

atis_airfare:  what is the cheapest one way fare from boston to san francisco <|endoftext|>
atis_airfare:  what is the cost of a round trip ticket from boston to san francisco <|endoftext|>
atis_airfare:  how much does delta offer economy class on united airlines <|endoftext|>
atis_airfare:  what is the round trip cost of a one way ticket from new york to san francisco <|endoftext|>
atis_airfare:  show me the fare for connecting to boston international <|endoftext|>
atis_airfare:  what is the fare for flight 965 from boston to denver <|endoftext|>
atis_airfare:  please list the first class fares for delta airlines <|endoftext|>
atis_airfare:  what is the cost of one way fare between denver and philadelphia <|endoftext|>
atis_airfare:  i'd like to book a flight from san francisco to boston <|endoftext|>
atis_airfare:  list the cheapest one way fares from denver to san francisco <|endoftext|>


In [24]:
generate(TOKENIZER, MODEL, SENTENCES, "atis_flight")

atis_flight:  show me the latest flight from boston to oakland <|endoftext|>
atis_flight:  what are the flights from denver to pittsburgh on wednesday <|endoftext|>
atis_flight:  show me all the flights from boston to san francisco <|endoftext|>
atis_flight:  show me the flights from denver to pittsburgh on august fourth <|endoftext|>
atis_flight:  what are the flights between boston and denver <|endoftext|>
atis_flight:  flights between dallas and boston <|endoftext|>
atis_flight:  show me the least expensive flight from atlanta to charlotte <|endoftext|>
atis_flight:  which one of the flights is a stopover in denver <|endoftext|>
atis_flight:  show me all the united flights leaving boston <|endoftext|>
atis_flight:  list all flights on united from san francisco to philadelphia <|endoftext|>
