# Training a Simple Chatbot using a Seq-to-Seq Model

This notebook trains a simple chatbot using the Cornell Movie Dialogs Corpus and a sequence-to-sequence model with Luong attention mechanism. It includes running hyperparameter sweeps with Weights and Biases (W&B).

## Overview
The key steps involve training the chatbot model, running hyperparameter sweeps, and analyzing the results.

## Procedure
- **Chatbot Training**: Train and evaluate the chatbot model using the PyTorch Chatbot Tutorial.
- **Hyperparameter Sweep Configuration**: Create a sweep configuration using W&B Random Search strategy for specified hyperparameters.
- **Run Hyperparameter Sweeps**: Execute the hyperparameter sweeps in a GPU-enabled environment and observe the results.
- **Best Hyperparameters**: Extract and analyze the best hyperparameters, explain their impact on model convergence, and save the trained model with the lowest loss.

References:
- [Cornell Movie Dialogs Corpus](https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html)


In [None]:
import sys
print(sys.executable)

/scratch/dan9232/ADS/penv/bin/python


[32m[1mLet's setup this directory for W&B![0m
^C

Aborted!


/Users/darien/miniforge3/envs/EnvKeras/bin/python


In [None]:
import os
import requests
import zipfile

# URL of the ZIP file
download_link = 'https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip'

response = requests.get(download_link)
zip_filename = 'movie-corpus.zip'

with open(zip_filename, 'wb') as zip_file:
    zip_file.write(response.content)
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall()
os.remove(zip_filename)

In [None]:
# !pip3 install torch torchvision torchaudio


In [None]:
# and put in a ``data/`` directory under the current directory.
#
# After that, let’s import some necessities.
#

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import json


USE_CUDA = torch.cuda.is_available()

print(USE_CUDA)
device = torch.device("cuda" if USE_CUDA else ("mps" if torch.backends.mps.is_available() else "cpu"))

True


In [None]:
# !pip uninstall torch torchvision torchaudio -y

In [None]:
# !pip3 install torch torchvision torchaudio

In [None]:
torch.version

<module 'torch.version' from '/scratch/dan9232/ADS/penv/lib/python3.12/site-packages/torch/version.py'>

In [None]:
corpus_name = "movie-corpus"
corpus = os.path.join("data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

printLines(os.path.join(corpus, "utterances.jsonl"))

b'{"id": "L1045", "conversation_id": "L1044", "text": "They do not!", "speaker": "u0", "meta": {"movie_id": "m0", "parsed": [{"rt": 1, "toks": [{"tok": "They", "tag": "PRP", "dep": "nsubj", "up": 1, "dn": []}, {"tok": "do", "tag": "VBP", "dep": "ROOT", "dn": [0, 2, 3]}, {"tok": "not", "tag": "RB", "dep": "neg", "up": 1, "dn": []}, {"tok": "!", "tag": ".", "dep": "punct", "up": 1, "dn": []}]}]}, "reply-to": "L1044", "timestamp": null, "vectors": []}\n'
b'{"id": "L1044", "conversation_id": "L1044", "text": "They do to!", "speaker": "u2", "meta": {"movie_id": "m0", "parsed": [{"rt": 1, "toks": [{"tok": "They", "tag": "PRP", "dep": "nsubj", "up": 1, "dn": []}, {"tok": "do", "tag": "VBP", "dep": "ROOT", "dn": [0, 2, 3]}, {"tok": "to", "tag": "TO", "dep": "dobj", "up": 1, "dn": []}, {"tok": "!", "tag": ".", "dep": "punct", "up": 1, "dn": []}]}]}, "reply-to": null, "timestamp": null, "vectors": []}\n'
b'{"id": "L985", "conversation_id": "L984", "text": "I hope so.", "speaker": "u0", "meta": {

In [None]:
# Splits each line of the file to create lines and conversations
def loadLinesAndConversations(fileName):
    lines = {}
    conversations = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            lineJson = json.loads(line)
            # Extract fields for line object
            lineObj = {}
            lineObj["lineID"] = lineJson["id"]
            lineObj["characterID"] = lineJson["speaker"]
            lineObj["text"] = lineJson["text"]
            lines[lineObj['lineID']] = lineObj

            # Extract fields for conversation object
            if lineJson["conversation_id"] not in conversations:
                convObj = {}
                convObj["conversationID"] = lineJson["conversation_id"]
                convObj["movieID"] = lineJson["meta"]["movie_id"]
                convObj["lines"] = [lineObj]
            else:
                convObj = conversations[lineJson["conversation_id"]]
                convObj["lines"].insert(0, lineObj)
            conversations[convObj["conversationID"]] = convObj

    return lines, conversations


# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations.values():
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1):  # We ignore the last line (no answer for it)
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # Filter wrong samples (if one of the lists is empty)
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs

In [None]:
# Define path to new file
datafile = os.path.join(corpus, "formatted_movie_lines.txt")

delimiter = '\t'
# Unescape the delimiter
delimiter = str(codecs.decode(delimiter, "unicode_escape"))

# Initialize lines dict and conversations dict
lines = {}
conversations = {}
# Load lines and conversations
print("\nProcessing corpus into lines and conversations...")
lines, conversations = loadLinesAndConversations(os.path.join(corpus, "utterances.jsonl"))

# Write new csv file
print("\nWriting newly formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
    csvwriter = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    for pair in extractSentencePairs(conversations):
        csvwriter.writerow(pair)

# Print a sample of lines
print("\nSample lines from file:")
printLines(datafile)


Processing corpus into lines and conversations...

Writing newly formatted file...

Sample lines from file:
b'They do to!\tThey do not!\n'
b'She okay?\tI hope so.\n'
b"Wow\tLet's go.\n"
b'"I\'m kidding.  You know how sometimes you just become this ""persona""?  And you don\'t know how to quit?"\tNo\n'
b"No\tOkay -- you're gonna need to learn how to lie.\n"
b"I figured you'd get to the good stuff eventually.\tWhat good stuff?\n"
b'What good stuff?\t"The ""real you""."\n'
b'"The ""real you""."\tLike my fear of wearing pastels?\n'
b'do you listen to this crap?\tWhat crap?\n'
b"What crap?\tMe.  This endless ...blonde babble. I'm like, boring myself.\n"


In [None]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [None]:
MAX_LENGTH = 10  # Maximum sentence length to consider

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    lines = open(datafile, encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    voc = Voc(corpus_name)
    return voc, pairs

# Returns True if both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

# Filter pairs using the ``filterPair`` condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs


# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
# Print some pairs to validate
print("\npairs:")
for pair in pairs[:10]:
    print(pair)

Start preparing training data ...
Reading lines...
Read 221282 sentence pairs
Trimmed to 64313 sentence pairs
Counting words...
Counted words: 18082

pairs:
['they do to !', 'they do not !']
['she okay ?', 'i hope so .']
['wow', 'let s go .']
['what good stuff ?', 'the real you .']
['the real you .', 'like my fear of wearing pastels ?']
['do you listen to this crap ?', 'what crap ?']
['well no . . .', 'then that s all you had to say .']
['then that s all you had to say .', 'but']
['but', 'you always been this selfish ?']
['have fun tonight ?', 'tons']


In [None]:
MIN_COUNT = 3    # Minimum word count threshold for trimming

def trimRareWords(voc, pairs, MIN_COUNT):
    # Trim words used under the MIN_COUNT from the voc
    voc.trim(MIN_COUNT)
    # Filter out pairs with trimmed words
    keep_pairs = []
    for pair in pairs:
        input_sentence = pair[0]
        output_sentence = pair[1]
        keep_input = True
        keep_output = True
        # Check input sentence
        for word in input_sentence.split(' '):
            if word not in voc.word2index:
                keep_input = False
                break
        # Check output sentence
        for word in output_sentence.split(' '):
            if word not in voc.word2index:
                keep_output = False
                break

        # Only keep pairs that do not contain trimmed word(s) in their input or output sentence
        if keep_input and keep_output:
            keep_pairs.append(pair)

    print("Trimmed from {} pairs to {}, {:.4f} of total".format(len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs)))
    return keep_pairs


# Trim voc and pairs
pairs = trimRareWords(voc, pairs, MIN_COUNT)

keep_words 7833 / 18079 = 0.4333
Trimmed from 64313 pairs to 53131, 0.8261 of total


In [None]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len


# Example for validation
small_batch_size = 5
batches = batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print("input_variable:", input_variable)
print("lengths:", lengths)
print("target_variable:", target_variable)
print("mask:", mask)
print("max_target_len:", max_target_len)

input_variable: tensor([[   4,   77, 4938,   24,  679],
        [  24,   24, 5854,  109, 5361],
        [1026,   39,   14,   25,   14],
        [  20, 2644,    2,    2,    2],
        [  84,   10,    0,    0,    0],
        [  85,    2,    0,    0,    0],
        [  10,    0,    0,    0,    0],
        [   2,    0,    0,    0,    0]])
lengths: tensor([8, 6, 4, 4, 4])
target_variable: tensor([[  34,  162,   24, 1077,  679],
        [  14,   14,   64,    6,   14],
        [   2,    2,    7,    2,    2],
        [   0,    0,  322,    0,    0],
        [   0,    0,   14,    0,    0],
        [   0,    0,    2,    0,    0]])
mask: tensor([[ True,  True,  True,  True,  True],
        [ True,  True,  True,  True,  True],
        [ True,  True,  True,  True,  True],
        [False, False,  True, False, False],
        [False, False,  True, False, False],
        [False, False,  True, False, False]])
max_target_len: 6


In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size parameters are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden

In [None]:
# Luong attention layer
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [None]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [None]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()


In [None]:
from datetime import datetime

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('logs/chatbot')  # Directory to store TensorBoard logs

global save_model_board
save_model_board = True



def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length=MAX_LENGTH):
    global save_model_board
    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    # Lengths for RNN packing should always be on the CPU
    lengths = lengths.to("cpu")

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    if save_model_board:
        writer.add_graph(encoder, (input_variable, lengths))

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]


    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            if save_model_board:
                writer.add_graph(decoder, (decoder_input, decoder_hidden, encoder_outputs))
                save_model_board = False

            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal


    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropagation
    loss.backward()

    # Clip gradients: gradients are modified in place
    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()



    return sum(print_losses) / n_totals



In [None]:
def trainIters(
    model_name,
    voc,
    pairs,
    encoder,
    decoder,
    encoder_optimizer,
    decoder_optimizer,
    embedding,
    encoder_n_layers,
    decoder_n_layers,
    save_dir,
    n_iteration,
    batch_size,
    print_every,
    save_every,
    clip,
    corpus_name,
    loadFilename,
    wandb=None,
):

    # Load batches for each iteration
    training_batches = [
        batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
        for _ in range(n_iteration)
    ]

    # Initializations
    print("Initializing ...")
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint["iteration"] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(
            input_variable,
            lengths,
            target_variable,
            mask,
            max_target_len,
            encoder,
            decoder,
            embedding,
            encoder_optimizer,
            decoder_optimizer,
            batch_size,
            clip,
        )
        print_loss += loss

        writer.add_scalar("train_loss", loss, global_step=iteration)
        writer.add_scalar(
            "train_loss_avg", print_loss / iteration, global_step=iteration
        )

        if wandb:
            wandb.log({"train_loss": loss, "iteration": iteration})

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print(
                "Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(
                    iteration, iteration / n_iteration * 100, print_loss_avg
                )
            )
            print_loss = 0

        # Save checkpoint
        if iteration % save_every == 0:
            directory = os.path.join(
                save_dir,
                model_name,
                corpus_name,
                "{}-{}_{}".format(encoder_n_layers, decoder_n_layers, hidden_size),
            )
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save(
                {
                    "iteration": iteration,
                    "en": encoder.state_dict(),
                    "de": decoder.state_dict(),
                    "en_opt": encoder_optimizer.state_dict(),
                    "de_opt": decoder_optimizer.state_dict(),
                    "loss": loss,
                    "voc_dict": voc.__dict__,
                    "embedding": embedding.state_dict(),
                },
                os.path.join(directory, "{}_{}.tar".format(iteration, "checkpoint")),
            )

        writer.close()

In [None]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [None]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to("cpu")
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")

In [None]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#``attn_model = 'general'``
#``attn_model = 'concat'``
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000
# checkpoint_iter = 10

In [None]:
loadFilename = os.path.join(save_dir, model_name, corpus_name,
                    '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
                    '{}_checkpoint.tar'.format(checkpoint_iter))

In [None]:
# Load model if a ``loadFilename`` is provided
loadFilename = None
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)


# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [None]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have CUDA, configure CUDA to call
if device == 'cuda':
    for state in encoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.cuda()
    
    for state in decoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.cuda()
else:
    for state in encoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.to(device)
    
    for state in decoder_optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.to(device)
                
# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, corpus_name, loadFilename)



Building optimizers ...
Starting Training!
Initializing ...
Training...


  if a.grad is not None:


Iteration: 1; Percent complete: 0.0%; Average loss: 8.9813
Iteration: 2; Percent complete: 0.1%; Average loss: 8.8644
Iteration: 3; Percent complete: 0.1%; Average loss: 8.6873
Iteration: 4; Percent complete: 0.1%; Average loss: 8.3962
Iteration: 5; Percent complete: 0.1%; Average loss: 7.9907
Iteration: 6; Percent complete: 0.1%; Average loss: 7.5111
Iteration: 7; Percent complete: 0.2%; Average loss: 6.8828
Iteration: 8; Percent complete: 0.2%; Average loss: 6.7280
Iteration: 9; Percent complete: 0.2%; Average loss: 6.8358
Iteration: 10; Percent complete: 0.2%; Average loss: 6.5657
Iteration: 11; Percent complete: 0.3%; Average loss: 6.3930
Iteration: 12; Percent complete: 0.3%; Average loss: 6.0575
Iteration: 13; Percent complete: 0.3%; Average loss: 5.5440
Iteration: 14; Percent complete: 0.4%; Average loss: 5.3874
Iteration: 15; Percent complete: 0.4%; Average loss: 5.6641
Iteration: 16; Percent complete: 0.4%; Average loss: 5.4628
Iteration: 17; Percent complete: 0.4%; Average lo

In [None]:

import wandb


wandb.init(
    project='chatbot', 
    config = { 
        "learning_rate": 0.02,
        "architecture": "CNN",
        "dataset": "CIFAR-100",
        "epochs": 10,
    }
)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/dan9232/.netrc


In [None]:
# !pip install wandb -Uq
# !pip install wandb

[0mDefaulting to user installation because normal site-packages is not writeable


In [None]:

sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'values': [0.0001, 0.00025, 0.0005, 0.001]
        },
        'optimizer': {
            'values': ['adam', 'sgd']
        },
        'clip': {
            'values': [0, 25, 50, 100]
        },
        'teacher_forcing_ratio': {
            'values': [0, 0.5, 1.0]
        },
        'decoder_learning_ratio': {
            'values': [1.0, 3.0, 5.0, 10.0]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project='chatbot')


Create sweep with ID: sgdvrvn9
Sweep URL: https://wandb.ai/darien-nouri/chatbot/sweeps/sgdvrvn9


In [None]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'clip': {'values': [0, 25, 50, 100]},
                'decoder_learning_ratio': {'values': [1.0, 3.0, 5.0, 10.0]},
                'learning_rate': {'values': [0.0001, 0.00025, 0.0005, 0.001]},
                'optimizer': {'values': ['adam', 'sgd']},
                'teacher_forcing_ratio': {'values': [0, 0.5, 1.0]}}}


In [None]:
# !pip install wandb -Uq
# !pip install wandb
import wandb
import os 
# os.environ["WANDB_SILENT"] = "false"
# wandb.finish()
wandb.login(relogin=True)
# wandb.login()
'911037bd435baa15a98d26a3955bbd3ea2559318'

In [None]:

n_iteration = 4000
print_every = 1
save_every = 500


sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'values': [0.0001, 0.00025, 0.0005, 0.001]
        },
        'optimizer': {
            'values': ['adam', 'sgd']
        },
        'clip': {
            'values': [0, 25, 50, 100]
        },
        'teacher_forcing_ratio': {
            'values': [0, 0.5, 1.0]
        },
        'decoder_learning_ratio': {
            'values': [1.0, 3.0, 5.0, 10.0]
        }
    }
}

project_name = 'chatbot-t7'

sweep_id = wandb.sweep(sweep_config, project=project_name)


In [None]:
from datetime import datetime

model_name = 'cb_model_{}'.format(datetime.now().strftime('%Y-%m-%d_%H-%M'))
attn_model = 'dot'
#``attn_model = 'general'``
#``attn_model = 'concat'``
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64


print_every = 1
save_every = 500

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000

In [None]:
def trainSweep(config=None):
    
    with wandb.init(config=config):
      
        config = wandb.config
        optimizer_name = config.optimizer
        clip = config.clip
        learning_rate = config.learning_rate
        teacher_forcing_ratio = config.teacher_forcing_ratio
        decoder_learning_ratio = config.decoder_learning_ratio
        embedding = nn.Embedding(voc.num_words, hidden_size)
        
        if loadFilename:
            embedding.load_state_dict(embedding_sd)
        encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
        decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
        if loadFilename:
            encoder.load_state_dict(encoder_sd)
            decoder.load_state_dict(decoder_sd)
        encoder = encoder.to(device)
        decoder = decoder.to(device)
     
        encoder.train()
        decoder.train()
        if optimizer_name == 'adam':
            encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
            decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
        elif optimizer_name == 'sgd':
            encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
            decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)

        # Initializations
        print("Initializing ...")
        if device == 'cuda':
            for state in encoder_optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.cuda()
            
            for state in decoder_optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.cuda()
                        
        start_iteration = 1
        
    
        for iteration in range(1, n_iteration + 1):
            training_batch = batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
            input_variable, lengths, target_variable, mask, max_target_len = training_batch

            # Run a training iteration with batch
            loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                         decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip, teacher_forcing_ratio=teacher_forcing_ratio)

            print_loss = loss
            wandb.log({"loss": print_loss, "iteration": iteration})

            if iteration % print_every == 0:
                print_loss_avg = print_loss / print_every
                print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(
                    iteration, iteration / n_iteration * 100, print_loss_avg))

            
            if iteration % save_every == 0:
                directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(
                    encoder_n_layers, decoder_n_layers, hidden_size))
                if not os.path.exists(directory):
                    os.makedirs(directory)
                torch.save({
                    'iteration': iteration,
                    'en': encoder.state_dict(),
                    'de': decoder.state_dict(),
                    'en_opt': encoder_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': loss,
                    'voc_dict': voc.__dict__,
                    'embedding': embedding.state_dict()
                }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))
    
        wandb.finish()
        

        

In [None]:
wandb.agent(sweep_id, train_sweep, count=15, project='chatbot')



<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[34m[1mwandb[0m: Agent Starting Run: qgxhkwik with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 3
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
Exception in thread ChkStopThr:
Traceback (most recent call last):
  File "/scratch/dan9232/ADS/penv/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/scratch/dan9232/ADS/penv/lib/python3.12/threading.py", line 1010, in run
Exception in thread NetStatThr:
Traceback (most recent call last):
  File "/scratch/dan9232/ADS/penv/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
Exception in thread IntMsgThr:
Traceback (most recent call last):
  File "/scratch/dan9232/ADS/penv/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self._target(*self._args, **self._kwargs)
  File "/scratch/dan9232/ADS/penv/lib/python3.12/site-packages/wandb/sdk/wandb_run.py", line 286, in chec

Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 2.8394
Iteration: 2; Percent complete: 0.1%; Average loss: 2.6348
Iteration: 3; Percent complete: 0.1%; Average loss: 2.7006
Iteration: 4; Percent complete: 0.1%; Average loss: 2.7708
Iteration: 5; Percent complete: 0.1%; Average loss: 2.6651
Iteration: 6; Percent complete: 0.1%; Average loss: 2.7229
Iteration: 7; Percent complete: 0.2%; Average loss: 2.5788
Iteration: 8; Percent complete: 0.2%; Average loss: 2.9067
Iteration: 9; Percent complete: 0.2%; Average loss: 3.0618
Iteration: 10; Percent complete: 0.2%; Average loss: 2.7888
Iteration: 11; Percent complete: 0.3%; Average loss: 3.1580
Iteration: 12; Percent complete: 0.3%; Average loss: 3.1163
Iteration: 13; Percent complete: 0.3%; Average loss: 2.9760
Iteration: 14; Percent complete: 0.4%; Average loss: 2.9710
Iteration: 15; Percent complete: 0.4%; Average loss: 2.9108
Iteration: 16; Percent complete: 0.4%; Average loss: 2.8873
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▇█▆▇▆█▅▆▆▆▆▅▅▆▄▄▄▄▃▄▄▄▃▃▃▃▂▃▃▂▃▂▂▂▂▁▁▂▁▂

0,1
iteration,4000.0
train_loss,1.53665


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pzjqmisu with config:
[34m[1mwandb[0m: 	clip: 100
[34m[1mwandb[0m: 	decoder_learning_ratio: 1
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0
[34m[1mwandb[0m: Currently logged in as: [33mdariennouri[0m ([33mdarien-nouri[0m). Use [1m`wandb login --relogin`[0m to force relogin


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 1.7708
Iteration: 2; Percent complete: 0.1%; Average loss: 1.7338
Iteration: 3; Percent complete: 0.1%; Average loss: 1.6344
Iteration: 4; Percent complete: 0.1%; Average loss: 1.4245
Iteration: 5; Percent complete: 0.1%; Average loss: 1.9611
Iteration: 6; Percent complete: 0.1%; Average loss: 1.6750
Iteration: 7; Percent complete: 0.2%; Average loss: 1.5819
Iteration: 8; Percent complete: 0.2%; Average loss: 1.8298
Iteration: 9; Percent complete: 0.2%; Average loss: 2.0187
Iteration: 10; Percent complete: 0.2%; Average loss: 1.7565
Iteration: 11; Percent complete: 0.3%; Average loss: 1.5842
Iteration: 12; Percent complete: 0.3%; Average loss: 1.7778
Iteration: 13; Percent complete: 0.3%; Average loss: 1.6656
Iteration: 14; Percent complete: 0.4%; Average loss: 1.8760
Iteration: 15; Percent complete: 0.4%; Average loss: 1.4993
Iteration: 16; Percent complete: 0.4%; Average loss: 1.6148
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▇▆▆▆▅▆█▅▄▅▄▅▄▄▄▄▅▄▃▃▄▄▃▃▃▂▂▄▂▂▄▃▂▂▂▁▄▁▃▄

0,1
iteration,4000.0
train_loss,0.94717


[34m[1mwandb[0m: Agent Starting Run: 7fbgkoey with config:
[34m[1mwandb[0m: 	clip: 25
[34m[1mwandb[0m: 	decoder_learning_ratio: 1
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.8599
Iteration: 2; Percent complete: 0.1%; Average loss: 0.8968
Iteration: 3; Percent complete: 0.1%; Average loss: 0.9567
Iteration: 4; Percent complete: 0.1%; Average loss: 0.8803
Iteration: 5; Percent complete: 0.1%; Average loss: 1.0404
Iteration: 6; Percent complete: 0.1%; Average loss: 0.9739
Iteration: 7; Percent complete: 0.2%; Average loss: 0.8897
Iteration: 8; Percent complete: 0.2%; Average loss: 0.8769
Iteration: 9; Percent complete: 0.2%; Average loss: 0.8878
Iteration: 10; Percent complete: 0.2%; Average loss: 0.8553
Iteration: 11; Percent complete: 0.3%; Average loss: 0.8693
Iteration: 12; Percent complete: 0.3%; Average loss: 1.1131
Iteration: 13; Percent complete: 0.3%; Average loss: 0.8975
Iteration: 14; Percent complete: 0.4%; Average loss: 0.9617
Iteration: 15; Percent complete: 0.4%; Average loss: 0.8419
Iteration: 16; Percent complete: 0.4%; Average loss: 0.8745
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▆▇▇▇█▆█▅▅▆▄▅▅▅▄▄▅▅▄▅▃▆▃▂▄▄▅▄▃▃▄▂▁▃▃▅▂▃▄▁

0,1
iteration,4000.0
train_loss,0.50203


[34m[1mwandb[0m: Agent Starting Run: nn5xha9x with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 5
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.5224
Iteration: 2; Percent complete: 0.1%; Average loss: 0.5849
Iteration: 3; Percent complete: 0.1%; Average loss: 0.6990
Iteration: 4; Percent complete: 0.1%; Average loss: 0.8174
Iteration: 5; Percent complete: 0.1%; Average loss: 0.6364
Iteration: 6; Percent complete: 0.1%; Average loss: 0.6326
Iteration: 7; Percent complete: 0.2%; Average loss: 0.7429
Iteration: 8; Percent complete: 0.2%; Average loss: 0.5952
Iteration: 9; Percent complete: 0.2%; Average loss: 0.5974
Iteration: 10; Percent complete: 0.2%; Average loss: 0.6871
Iteration: 11; Percent complete: 0.3%; Average loss: 0.6648
Iteration: 12; Percent complete: 0.3%; Average loss: 0.6246
Iteration: 13; Percent complete: 0.3%; Average loss: 0.6597
Iteration: 14; Percent complete: 0.4%; Average loss: 0.7241
Iteration: 15; Percent complete: 0.4%; Average loss: 0.9018
Iteration: 16; Percent complete: 0.4%; Average loss: 0.6145
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▅▂█▄▅▂▄▅▄▄▆▂▆▄▃▄▂▅▅▃▃▄▃▂▂▃▃▁▃▅▂▂▁▄▂▃▆▄▃▂

0,1
iteration,4000.0
train_loss,0.71041


[34m[1mwandb[0m: Agent Starting Run: gugfyssw with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 1
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.5324
Iteration: 2; Percent complete: 0.1%; Average loss: 0.5584
Iteration: 3; Percent complete: 0.1%; Average loss: 0.6737
Iteration: 4; Percent complete: 0.1%; Average loss: 0.6797
Iteration: 5; Percent complete: 0.1%; Average loss: 0.6678
Iteration: 6; Percent complete: 0.1%; Average loss: 0.8483
Iteration: 7; Percent complete: 0.2%; Average loss: 0.6513
Iteration: 8; Percent complete: 0.2%; Average loss: 0.4557
Iteration: 9; Percent complete: 0.2%; Average loss: 0.5016
Iteration: 10; Percent complete: 0.2%; Average loss: 0.5657
Iteration: 11; Percent complete: 0.3%; Average loss: 0.5567
Iteration: 12; Percent complete: 0.3%; Average loss: 0.5361
Iteration: 13; Percent complete: 0.3%; Average loss: 0.5434
Iteration: 14; Percent complete: 0.4%; Average loss: 0.6002
Iteration: 15; Percent complete: 0.4%; Average loss: 0.7219
Iteration: 16; Percent complete: 0.4%; Average loss: 0.6147
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▃█▆▄▄▅▃▄▃▃▅▄▂▃▄▃▂▃▄▃▂▄▃▂▃▁▂▃▂▂▃▁▃▂▂▃▂▂▂▂

0,1
iteration,4000.0
train_loss,0.23806


[34m[1mwandb[0m: Agent Starting Run: 93o8h8a6 with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 10
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.2572
Iteration: 2; Percent complete: 0.1%; Average loss: 0.2589
Iteration: 3; Percent complete: 0.1%; Average loss: 0.4122
Iteration: 4; Percent complete: 0.1%; Average loss: 0.3122
Iteration: 5; Percent complete: 0.1%; Average loss: 0.2949
Iteration: 6; Percent complete: 0.1%; Average loss: 0.4259
Iteration: 7; Percent complete: 0.2%; Average loss: 0.3906
Iteration: 8; Percent complete: 0.2%; Average loss: 0.3958
Iteration: 9; Percent complete: 0.2%; Average loss: 0.3062
Iteration: 10; Percent complete: 0.2%; Average loss: 0.4266
Iteration: 11; Percent complete: 0.3%; Average loss: 0.4021
Iteration: 12; Percent complete: 0.3%; Average loss: 0.4071
Iteration: 13; Percent complete: 0.3%; Average loss: 0.3425
Iteration: 14; Percent complete: 0.4%; Average loss: 0.3803
Iteration: 15; Percent complete: 0.4%; Average loss: 0.4237
Iteration: 16; Percent complete: 0.4%; Average loss: 0.4319
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▁▃▃▃▆▃▄▃▄▆▆▆▆▇▆▆▅▆▅▅▆▆▅▆▅▄▄▆▄▆▄▄▅█▇▅▅▆▇▄

0,1
iteration,4000.0
train_loss,1.26326


[34m[1mwandb[0m: Agent Starting Run: fitgcy7l with config:
[34m[1mwandb[0m: 	clip: 25
[34m[1mwandb[0m: 	decoder_learning_ratio: 3
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 1.3279
Iteration: 2; Percent complete: 0.1%; Average loss: 1.3929
Iteration: 3; Percent complete: 0.1%; Average loss: 1.3300
Iteration: 4; Percent complete: 0.1%; Average loss: 1.1903
Iteration: 5; Percent complete: 0.1%; Average loss: 1.0909
Iteration: 6; Percent complete: 0.1%; Average loss: 1.1632
Iteration: 7; Percent complete: 0.2%; Average loss: 1.4388
Iteration: 8; Percent complete: 0.2%; Average loss: 1.0238
Iteration: 9; Percent complete: 0.2%; Average loss: 1.1028
Iteration: 10; Percent complete: 0.2%; Average loss: 1.3720
Iteration: 11; Percent complete: 0.3%; Average loss: 0.9664
Iteration: 12; Percent complete: 0.3%; Average loss: 1.2528
Iteration: 13; Percent complete: 0.3%; Average loss: 1.2976
Iteration: 14; Percent complete: 0.4%; Average loss: 1.1881
Iteration: 15; Percent complete: 0.4%; Average loss: 1.3047
Iteration: 16; Percent complete: 0.4%; Average loss: 1.5201
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▇▅▄▅▃▅▅▃▄▃▃▃▃▆▃▄▄▄▂▃▃▃▁▂▄▃▁▃▂▂▃▂▃▃▃▁▃▂▄

0,1
iteration,4000.0
train_loss,0.69289


[34m[1mwandb[0m: Agent Starting Run: 3e8p3jp5 with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 0.8341
Iteration: 2; Percent complete: 0.1%; Average loss: 0.4286
Iteration: 3; Percent complete: 0.1%; Average loss: 0.5166
Iteration: 4; Percent complete: 0.1%; Average loss: 0.6957
Iteration: 5; Percent complete: 0.1%; Average loss: 0.7510
Iteration: 6; Percent complete: 0.1%; Average loss: 0.7283
Iteration: 7; Percent complete: 0.2%; Average loss: 0.7448
Iteration: 8; Percent complete: 0.2%; Average loss: 0.6039
Iteration: 9; Percent complete: 0.2%; Average loss: 0.7291
Iteration: 10; Percent complete: 0.2%; Average loss: 0.8948
Iteration: 11; Percent complete: 0.3%; Average loss: 0.7327
Iteration: 12; Percent complete: 0.3%; Average loss: 0.8023
Iteration: 13; Percent complete: 0.3%; Average loss: 0.6103
Iteration: 14; Percent complete: 0.4%; Average loss: 0.7902
Iteration: 15; Percent complete: 0.4%; Average loss: 0.6465
Iteration: 16; Percent complete: 0.4%; Average loss: 0.8460
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▁▃▄▄▅▅▆▆▅▅▅▆▅▅▆▆▇▆█▆▇▇▇▅▆▇▇▇▆█▇▇▇█▇▆▇███

0,1
iteration,4000.0
train_loss,2.51838


[34m[1mwandb[0m: Agent Starting Run: 0e6tkxm5 with config:
[34m[1mwandb[0m: 	clip: 0
[34m[1mwandb[0m: 	decoder_learning_ratio: 3
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 2.5799
Iteration: 2; Percent complete: 0.1%; Average loss: 2.4765
Iteration: 3; Percent complete: 0.1%; Average loss: 2.8139
Iteration: 4; Percent complete: 0.1%; Average loss: 2.8556
Iteration: 5; Percent complete: 0.1%; Average loss: 2.6597
Iteration: 6; Percent complete: 0.1%; Average loss: 2.6389
Iteration: 7; Percent complete: 0.2%; Average loss: 2.7583
Iteration: 8; Percent complete: 0.2%; Average loss: 2.5750
Iteration: 9; Percent complete: 0.2%; Average loss: 2.5652
Iteration: 10; Percent complete: 0.2%; Average loss: 2.6159
Iteration: 11; Percent complete: 0.3%; Average loss: 2.4921
Iteration: 12; Percent complete: 0.3%; Average loss: 2.4448
Iteration: 13; Percent complete: 0.3%; Average loss: 2.6781
Iteration: 14; Percent complete: 0.4%; Average loss: 2.5101
Iteration: 15; Percent complete: 0.4%; Average loss: 2.5822
Iteration: 16; Percent complete: 0.4%; Average loss: 2.9029
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▆▃▂▅▅▆▂█▂▃▄▃▂▂▅▃▃▃▃▆▄▄▃▄▅▃▁▄▁▁▂▅▂▄▆▁▇▇▅▄

0,1
iteration,4000.0
train_loss,2.50535


[34m[1mwandb[0m: Agent Starting Run: au6pca5p with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 2.7138
Iteration: 2; Percent complete: 0.1%; Average loss: 2.7748
Iteration: 3; Percent complete: 0.1%; Average loss: 2.7815
Iteration: 4; Percent complete: 0.1%; Average loss: 3.1126
Iteration: 5; Percent complete: 0.1%; Average loss: 3.2414
Iteration: 6; Percent complete: 0.1%; Average loss: 2.8149
Iteration: 7; Percent complete: 0.2%; Average loss: 3.1203
Iteration: 8; Percent complete: 0.2%; Average loss: 3.2676
Iteration: 9; Percent complete: 0.2%; Average loss: 3.5241
Iteration: 10; Percent complete: 0.2%; Average loss: 3.4085
Iteration: 11; Percent complete: 0.3%; Average loss: 3.3167
Iteration: 12; Percent complete: 0.3%; Average loss: 3.4956
Iteration: 13; Percent complete: 0.3%; Average loss: 3.4698
Iteration: 14; Percent complete: 0.4%; Average loss: 3.2235
Iteration: 15; Percent complete: 0.4%; Average loss: 3.4026
Iteration: 16; Percent complete: 0.4%; Average loss: 3.6889
I

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▁▂▂▄▄▄▅▅▃▅▅▅▆▆▇▇▇▆▅▇▇█▆

0,1
iteration,4000.0
train_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5sybzfoq with config:
[34m[1mwandb[0m: 	clip: 100
[34m[1mwandb[0m: 	decoder_learning_ratio: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: nan
Iteration: 2; Percent complete: 0.1%; Average loss: nan
Iteration: 3; Percent complete: 0.1%; Average loss: nan
Iteration: 4; Percent complete: 0.1%; Average loss: nan
Iteration: 5; Percent complete: 0.1%; Average loss: nan
Iteration: 6; Percent complete: 0.1%; Average loss: nan
Iteration: 7; Percent complete: 0.2%; Average loss: nan
Iteration: 8; Percent complete: 0.2%; Average loss: nan
Iteration: 9; Percent complete: 0.2%; Average loss: nan
Iteration: 10; Percent complete: 0.2%; Average loss: nan
Iteration: 11; Percent complete: 0.3%; Average loss: nan
Iteration: 12; Percent complete: 0.3%; Average loss: nan
Iteration: 13; Percent complete: 0.3%; Average loss: nan
Iteration: 14; Percent complete: 0.4%; Average loss: nan
Iteration: 15; Percent complete: 0.4%; Average loss: nan
Iteration: 16; Percent complete: 0.4%; Average loss: nan
Iteration: 17; Percent complete: 0.4%; Average lo

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
iteration,4000.0
train_loss,


[34m[1mwandb[0m: Agent Starting Run: e79n1zj9 with config:
[34m[1mwandb[0m: 	clip: 50
[34m[1mwandb[0m: 	decoder_learning_ratio: 10
[34m[1mwandb[0m: 	learning_rate: 0.00025
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: nan
Iteration: 2; Percent complete: 0.1%; Average loss: nan
Iteration: 3; Percent complete: 0.1%; Average loss: nan
Iteration: 4; Percent complete: 0.1%; Average loss: nan
Iteration: 5; Percent complete: 0.1%; Average loss: nan
Iteration: 6; Percent complete: 0.1%; Average loss: nan
Iteration: 7; Percent complete: 0.2%; Average loss: nan
Iteration: 8; Percent complete: 0.2%; Average loss: nan
Iteration: 9; Percent complete: 0.2%; Average loss: nan
Iteration: 10; Percent complete: 0.2%; Average loss: nan
Iteration: 11; Percent complete: 0.3%; Average loss: nan
Iteration: 12; Percent complete: 0.3%; Average loss: nan
Iteration: 13; Percent complete: 0.3%; Average loss: nan
Iteration: 14; Percent complete: 0.4%; Average loss: nan
Iteration: 15; Percent complete: 0.4%; Average loss: nan
Iteration: 16; Percent complete: 0.4%; Average loss: nan
Iteration: 17; Percent complete: 0.4%; Average lo

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
iteration,4000.0
train_loss,


[34m[1mwandb[0m: Agent Starting Run: 8iyx8piq with config:
[34m[1mwandb[0m: 	clip: 25
[34m[1mwandb[0m: 	decoder_learning_ratio: 10
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: nan
Iteration: 2; Percent complete: 0.1%; Average loss: nan
Iteration: 3; Percent complete: 0.1%; Average loss: nan
Iteration: 4; Percent complete: 0.1%; Average loss: nan
Iteration: 5; Percent complete: 0.1%; Average loss: nan
Iteration: 6; Percent complete: 0.1%; Average loss: nan
Iteration: 7; Percent complete: 0.2%; Average loss: nan
Iteration: 8; Percent complete: 0.2%; Average loss: nan
Iteration: 9; Percent complete: 0.2%; Average loss: nan
Iteration: 10; Percent complete: 0.2%; Average loss: nan
Iteration: 11; Percent complete: 0.3%; Average loss: nan
Iteration: 12; Percent complete: 0.3%; Average loss: nan
Iteration: 13; Percent complete: 0.3%; Average loss: nan
Iteration: 14; Percent complete: 0.4%; Average loss: nan
Iteration: 15; Percent complete: 0.4%; Average loss: nan
Iteration: 16; Percent complete: 0.4%; Average loss: nan
Iteration: 17; Percent complete: 0.4%; Average lo

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
iteration,4000.0
train_loss,


[34m[1mwandb[0m: Agent Starting Run: nbxq0dao with config:
[34m[1mwandb[0m: 	clip: 25
[34m[1mwandb[0m: 	decoder_learning_ratio: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: nan
Iteration: 2; Percent complete: 0.1%; Average loss: nan
Iteration: 3; Percent complete: 0.1%; Average loss: nan
Iteration: 4; Percent complete: 0.1%; Average loss: nan
Iteration: 5; Percent complete: 0.1%; Average loss: nan
Iteration: 6; Percent complete: 0.1%; Average loss: nan
Iteration: 7; Percent complete: 0.2%; Average loss: nan
Iteration: 8; Percent complete: 0.2%; Average loss: nan
Iteration: 9; Percent complete: 0.2%; Average loss: nan
Iteration: 10; Percent complete: 0.2%; Average loss: nan
Iteration: 11; Percent complete: 0.3%; Average loss: nan
Iteration: 12; Percent complete: 0.3%; Average loss: nan
Iteration: 13; Percent complete: 0.3%; Average loss: nan
Iteration: 14; Percent complete: 0.4%; Average loss: nan
Iteration: 15; Percent complete: 0.4%; Average loss: nan
Iteration: 16; Percent complete: 0.4%; Average loss: nan
Iteration: 17; Percent complete: 0.4%; Average lo

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
iteration,4000.0
train_loss,


[34m[1mwandb[0m: Agent Starting Run: jwd3twej with config:
[34m[1mwandb[0m: 	clip: 100
[34m[1mwandb[0m: 	decoder_learning_ratio: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0


Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: nan
Iteration: 2; Percent complete: 0.1%; Average loss: nan
Iteration: 3; Percent complete: 0.1%; Average loss: nan
Iteration: 4; Percent complete: 0.1%; Average loss: nan
Iteration: 5; Percent complete: 0.1%; Average loss: nan
Iteration: 6; Percent complete: 0.1%; Average loss: nan
Iteration: 7; Percent complete: 0.2%; Average loss: nan
Iteration: 8; Percent complete: 0.2%; Average loss: nan
Iteration: 9; Percent complete: 0.2%; Average loss: nan
Iteration: 10; Percent complete: 0.2%; Average loss: nan
Iteration: 11; Percent complete: 0.3%; Average loss: nan
Iteration: 12; Percent complete: 0.3%; Average loss: nan
Iteration: 13; Percent complete: 0.3%; Average loss: nan
Iteration: 14; Percent complete: 0.4%; Average loss: nan
Iteration: 15; Percent complete: 0.4%; Average loss: nan
Iteration: 16; Percent complete: 0.4%; Average loss: nan
Iteration: 17; Percent complete: 0.4%; Average lo

0,1
iteration,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
iteration,4000.0
train_loss,


Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x14d9389c08f0>> (for post_run_cell), with arguments args (<ExecutionResult object at 14d81d10f3b0, execution_count=35 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 14d81d10d340, raw_cell="wandb.agent(sweep_id, train_sweep, count=15, proje.." store_history=True silent=False shell_futures=True cell_id=2e688226-128e-4af6-b0fd-903e02bc1df0> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

In [None]:
print(sweep_id)


Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x14d9389c08f0>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 14d81d1bbbc0, raw_cell="print(sweep_id)
" store_history=True silent=False shell_futures=True cell_id=ccdd5646-4d7b-4277-a1cb-1b4229d1659c>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

sgdvrvn9
Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x14d9389c08f0>> (for post_run_cell), with arguments args (<ExecutionResult object at 14d81d1b8ef0, execution_count=36 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 14d81d1bbbc0, raw_cell="print(sweep_id)
" store_history=True silent=False shell_futures=True cell_id=ccdd5646-4d7b-4277-a1cb-1b4229d1659c> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

# 2.5

In [None]:
! 

  _warn("subprocess %s is still running" % self.pid,


In [3]:
import wandb
# wandb.login()
api = wandb.Api()

sweep_id = "sgdvrvn9"
project = "chatbot"
api.api_key
api.user('dariennouri')


<User dan9232@nyu.edu>

In [7]:
!wandb agent darien-nouri/chatbot/sgdvrvn9

[34m[1mwandb[0m: Starting wandb agent 🕵️
2024-04-07 07:11:51,732 - wandb.wandb_agent - INFO - Running runs: []
2024-04-07 07:11:51,905 - wandb.wandb_agent - INFO - Agent received command: run
2024-04-07 07:11:51,905 - wandb.wandb_agent - INFO - Agent starting run with config:
	clip: 0
	decoder_learning_ratio: 3
	learning_rate: 0.001
	optimizer: adam
	teacher_forcing_ratio: 1
2024-04-07 07:11:51,906 - wandb.wandb_agent - INFO - About to run command: /usr/bin/env python  --clip=0 --decoder_learning_ratio=3 --learning_rate=0.001 --optimizer=adam --teacher_forcing_ratio=1
/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'
2024-04-07 07:11:56,914 - wandb.wandb_agent - INFO - Running runs: ['0adg527x']
2024-04-07 07:11:56,914 - wandb.wandb_agent - INFO - Cleaning up finished run: 0adg527x
2024-04-07 07:11:57,113 - wandb.wandb_agent - INFO - Agent received command: run

In [12]:
agent = wandb.agent(sweep_id,  project='chatbot', entity='darien-nouri')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  self._sweep_config = config_util.dict_from_config_file(


wandb: Agent Starting Run: hu2uynh2 with config:
	clip: 50
	decoder_learning_ratio: 3
	learning_rate: 0.001
	optimizer: sgd
	teacher_forcing_ratio: 1


/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'
  self._manager = None


wandb: Agent Starting Run: afkxfdyu with config:
	clip: 25
	decoder_learning_ratio: 3
	learning_rate: 0.00025
	optimizer: sgd
	teacher_forcing_ratio: 0


/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'


wandb: Agent Starting Run: c8eost33 with config:
	clip: 50
	decoder_learning_ratio: 1
	learning_rate: 0.00025
	optimizer: sgd
	teacher_forcing_ratio: 0.5


/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'


wandb: Agent Starting Run: tz4dl89d with config:
	clip: 50
	decoder_learning_ratio: 10
	learning_rate: 0.0001
	optimizer: sgd
	teacher_forcing_ratio: 1


/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'


wandb: Agent Starting Run: 16ztrpf9 with config:
	clip: 25
	decoder_learning_ratio: 5
	learning_rate: 0.00025
	optimizer: sgd
	teacher_forcing_ratio: 0


/Users/darien/miniforge3/envs/EnvKeras/bin/python: can't find '__main__' module in '/Users/darien/Library/Mobile Documents/com~apple~CloudDocs/05NYU/ADS/homework/homework05'
2024-04-07 07:14:35,319 - wandb.wandb_agent - ERROR - Detected 5 failed runs in a row, shutting down.


In [19]:
sweep = api.sweep(f"{project}/{sweep_id}")
# wandb sweep load-run-files .
best_run = sweep.best_run()



[34m[1mwandb[0m: Sorting runs by +summary_metrics.loss


In [41]:
api = wandb.Api(overrides={"entity": "darien-nouri", "project": "chatbot"})
sweep = api.sweep('sgdvrvn9')
runs = sorted(sweep.runs, key=lambda r: r.summary.get("train-loss", float("inf")))
best_hyperparameters = best_run.config
print(best_hyperparameters)
print(best_run.summary)
runs = sweep.runs

{'clip': 25, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 5}
{}


In [29]:
runs = sweep.runs


{'clip': 25, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 5}
{'clip': 50, 'optimizer': 'sgd', 'learning_rate': 0.0001, 'teacher_forcing_ratio': 1, 'decoder_learning_ratio': 10}
{'clip': 50, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0.5, 'decoder_learning_ratio': 1}
{'clip': 25, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 3}
{'clip': 50, 'optimizer': 'sgd', 'learning_rate': 0.001, 'teacher_forcing_ratio': 1, 'decoder_learning_ratio': 3}
{'clip': 50, 'optimizer': 'sgd', 'learning_rate': 0.0005, 'teacher_forcing_ratio': 0.5, 'decoder_learning_ratio': 3}
{'clip': 50, 'optimizer': 'sgd', 'learning_rate': 0.0001, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 5}
{'clip': 25, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 10}
{'clip': 25, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_for

In [44]:

wandb.login(relogin=True)

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/darien/.netrc


True

In [46]:

sweep_id = 'a1i0tefo'
api = wandb.Api(overrides={"entity": "dariennouri", "project": "chatbot-2"})
sweep = api.sweep(sweep_id)
runs = sorted(sweep.runs, key=lambda r: r.summary.get("train-loss", float("inf")))
best_run = runs[0]
best_hyperparameters = best_run.config
print(best_hyperparameters)
print(best_run.summary)
runs = sweep.runs


{'clip': 100, 'optimizer': 'adam', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 3}
{'_step': 3999, '_wandb': {'runtime': 102}, '_runtime': 102.82936096191406, 'iteration': 4000, '_timestamp': 1712491537.041005, 'loss': 0.6123574579264536}


In [50]:
run_data = [r.summary for r in runs if len(r.summary.keys()) > 0]


{'clip': 100, 'optimizer': 'adam', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 3} {'_step': 3999, '_wandb': {'runtime': 102}, '_runtime': 102.82936096191406, 'iteration': 4000, '_timestamp': 1712491537.041005, 'loss': 0.6123574579264536}
{'clip': 0, 'optimizer': 'sgd', 'learning_rate': 0.00025, 'teacher_forcing_ratio': 0, 'decoder_learning_ratio': 10} {'_runtime': 94.13869261741638, 'iteration': 4000, '_timestamp': 1712491428.0132327, 'loss': 0.3573152862364441, '_step': 3999, '_wandb': {'runtime': 94}}
{'clip': 100, 'optimizer': 'sgd', 'learning_rate': 0.001, 'teacher_forcing_ratio': 1, 'decoder_learning_ratio': 1} {'loss': 0.4165045227932763, '_step': 3999, '_wandb': {'runtime': 94}, '_runtime': 94.10815954208374, 'iteration': 4000, '_timestamp': 1712491326.4636605}
{'clip': 100, 'optimizer': 'adam', 'learning_rate': 0.0001, 'teacher_forcing_ratio': 0.5, 'decoder_learning_ratio': 10} {'loss': 0.5289357709867273, '_step': 3999, '_wandb': {'runtime':

NameError: name 'pd' is not defined

In [61]:
import pandas as pd
from rich import print as rprint
from pprint import pprint
for r in runs:
    pprint(r.config)

summaries = [r.summary for r in runs]
config = [r.config for r in runs]

results_df = pd.DataFrame(config)
results_df['loss'] = [s['loss'] for s in summaries]
results_df['runtime'] = [s['_wandb']['runtime'] for s in summaries]
results_df = results_df.sort_values('loss')
display(results_df)



{'clip': 100,
 'decoder_learning_ratio': 3,
 'learning_rate': 0.00025,
 'optimizer': 'adam',
 'teacher_forcing_ratio': 0}
{'clip': 0,
 'decoder_learning_ratio': 10,
 'learning_rate': 0.00025,
 'optimizer': 'sgd',
 'teacher_forcing_ratio': 0}
{'clip': 100,
 'decoder_learning_ratio': 1,
 'learning_rate': 0.001,
 'optimizer': 'sgd',
 'teacher_forcing_ratio': 1}
{'clip': 100,
 'decoder_learning_ratio': 10,
 'learning_rate': 0.0001,
 'optimizer': 'adam',
 'teacher_forcing_ratio': 0.5}
{'clip': 25,
 'decoder_learning_ratio': 5,
 'learning_rate': 0.0001,
 'optimizer': 'sgd',
 'teacher_forcing_ratio': 1}
{'clip': 100,
 'decoder_learning_ratio': 5,
 'learning_rate': 0.001,
 'optimizer': 'sgd',
 'teacher_forcing_ratio': 1}
{'clip': 25,
 'decoder_learning_ratio': 3,
 'learning_rate': 0.00025,
 'optimizer': 'adam',
 'teacher_forcing_ratio': 0.5}
{'clip': 100,
 'decoder_learning_ratio': 1,
 'learning_rate': 0.0005,
 'optimizer': 'sgd',
 'teacher_forcing_ratio': 0.5}
{'clip': 50,
 'decoder_learning_

Unnamed: 0,clip,optimizer,learning_rate,teacher_forcing_ratio,decoder_learning_ratio,loss,runtime
1,0,sgd,0.00025,0.0,10,0.357315,94
2,100,sgd,0.001,1.0,1,0.416505,94
3,100,adam,0.0001,0.5,10,0.528936,102
0,100,adam,0.00025,0.0,3,0.612357,102
4,25,sgd,0.0001,1.0,5,0.791215,94
6,25,adam,0.00025,0.5,3,0.888097,102
5,100,sgd,0.001,1.0,5,0.910652,94
7,100,sgd,0.0005,0.5,1,1.403808,94
8,50,adam,0.00025,1.0,3,1.859386,102
9,100,sgd,0.0005,0.0,10,2.778005,93


In [84]:
from IPython.display import IFrame
IFrame(src="https://wandb.ai/login", width=700,  height=500)



In [83]:
import wandb
from IPython.core.display import HTML
from IPython.display import IFrame
IFrame(src="https://wandb.ai/login", width=700,  height=500)



In [77]:
import wandb

wandb.log({"example": wandb.Html('<iframe src="https://wandb.ai/login" style="border: none; width :100%; height:500px" />')})


Error: You must call wandb.init() before wandb.log()

In [49]:
def reset_wandb_env():
    exclude = {
        "WANDB_PROJECT",
        "WANDB_ENTITY",
        "WANDB_API_KEY",
    }
    for k, v in os.environ.items():
        if k.startswith("WANDB_") and k not in exclude:
            del os.environ[k]
reset_wandb_env()

In [1]:
import wandb

wandb.login()
# wandb.init(entity="dariennouri", project="chatbot2", dir=".")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdariennouri[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:

api = wandb.Api()


sweep = api.sweep(f"dariennouri/chatbot/{sweep_id}")

runs = sorted(sweep.runs,
  key=lambda run: run.summary.get("val_acc", 0), reverse=True)
val_acc = runs[0].summary.get("val_acc", 0)
run0 = runs[0]

%env

NameError: name 'sweep_id' is not defined

In [None]:

print(f"Best run {runs[0].name} with {val_acc}% validation accuracy")

runs[0].file("model.h5").download(replace=True)
print("Best model saved to model-best.h5")