In [None]:
import numpy as np
import torch
import math
import random
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
from sentence_transformers import SentenceTransformer
import sentencepiece as spm
from torch.utils.data import DataLoader
import re

In [None]:
"""
MacOSv1 , vocabsize = 1739
MacOSv1 , vocabsize = 1785
"""

In [None]:
def set_seed(seed=50):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [None]:
set_seed()

In [None]:
device = torch.device('mps' if torch.mps.is_available() else 'cpu')

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
a = model.encode('This is a test sentence', convert_to_tensor=True)
print(a.shape)

In [None]:
VOCAB_PATH = '/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/vocab/macos_vocab.txt'
DATA_PATH = '/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/dataset/dataset5-bis.txt'
TokenIZER_PATH = '/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/notebook/tokenizer3.model'
SPECIAL_KEYS_PATH = '/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/vocab/key_vocab.txt'
OUTPUT_PATH = "/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/vocab/test2.txt"

In [None]:
import re

# Liste des touches clavier à rechercher
KEYWORDS = ["cmd", "ctrl", "shift", "alt", "option", "fn", "enter", "esc", "delete", "tab", "space", "capslock", "arrow", "+"]

def clean_quoted_tokens(text):
    def replacer(match):
        content = match.group(1)
        if any(key in content.lower() for key in KEYWORDS):
            return f"'{content.replace(' ', '')}'"
        return match.group(0)  # ne pas modifier si aucune touche détectée
    return re.sub(r"'([^']*)'", replacer, text)


In [None]:
# Initialisation des listes et dictionnaires
l = []
X = []
Y = []
dataset = []
"""

# Lecture du fichier
with open(DATA_PATH, "r") as f:
    cleaned_lines = [clean_quoted_tokens(line) for line in f]
    for line in cleaned_lines:
        line = line.lower()
        l.append(line.strip())

for data in l:
    split_data = data.split(";")
    x = split_data[0]
    y = split_data[1]
    try:
        y = y.replace("'cmd + space'", "'cmd+space'")
        y = y.replace("[", "")
        y = y.replace("]", "")
        y = y.replace(",", "")
        y = y.replace("'", "")
        y = y.replace("[", "")
    except:
        pass

    dataset.append((x,y))
    X.append(x)
    Y.append(y)
"""

app_path = "/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/vocab/app_vocab.txt"
instruction_path = "/Users/ibrahimbaldediallo/Documents/Code/Jarvis_project/vocab/variation.txt"

instruction = []


with open(instruction_path, "r") as f:
    lines = f.readlines()
    for line in lines:
        line = line.strip()
        if line.startswith("-"):
            instruct = line.replace("- ", "")
            instruction.append(instruct)

with open(app_path, "r") as f:
    lines = f.readlines()
    for app in lines:
        for instruct in instruction:
            x = instruct.replace('###', app.strip())
            y = f"['cmd+space', '{app.strip()}', 'enter']"
            y = y.replace("[", "")
            y = y.replace("]", "")
            y = y.replace(",", "")
            y = y.replace("'", "")
            y = y.replace("[", "")
            dataset.append((x, y))
            X.append(x)
            Y.append(y)
    
print(dataset[:10])

In [None]:
print(dataset[0][1])


In [None]:
print(Y[0])
print(Y[0].split())

In [None]:
words = []
for x,y in zip(X,Y):
   word_listx = x.split()
   for word in word_listx:
      if word not in words:
        words.append(word)
   word_listy = y.split()
   for word in word_listy:
      if word not in words:
        words.append(word)

In [None]:
print(words)
print(len(words))

In [None]:
from collections import OrderedDict

def build_action_vocab(vocab_file, extra_words=None):
    # Lire les lignes du vocabulaire fichier
    with open(vocab_file, 'r', encoding='utf-8') as f:
        vocab_lines = [line.strip() for line in f if line.strip()]

    # Ajouter les tokens spéciaux et les mots supplémentaires éventuels
    special_tokens = ["<PAD>", "<BOS>", "<EOS>", "<UNK>"]
    all_tokens = special_tokens + vocab_lines+ words
    if extra_words:
        all_tokens += extra_words

    # Supprimer les doublons tout en conservant l’ordre
    all_tokens_unique = list(OrderedDict.fromkeys(all_tokens))

    # Construire les dictionnaires
    action_to_id = {token: idx for idx, token in enumerate(all_tokens_unique)}
    id_to_action = {idx: token for token, idx in action_to_id.items()}

    return action_to_id, id_to_action


In [None]:
action_to_id, id_to_action = build_action_vocab(VOCAB_PATH)
print(len(action_to_id))
print(action_to_id["safari"])
print(id_to_action[8])

In [None]:
vocab_size = len(action_to_id)
print(vocab_size)

In [None]:
"""
random.shuffle(X)
random.shuffle(Y)

X = X[:int(0.3*len(X))]
Y = Y[:int(0.3*len(Y))]
"""

In [None]:
print(len(X))
print(len(Y))

In [None]:
Y_idx = []
Y_idx_sentence = []
for s in Y:
    s = s.split()
    s = [action_to_id[word] for word in s]
    Y_idx.append(s)


In [None]:
print(len(Y_idx))

In [None]:
from torch.nn.utils.rnn import pad_sequence

data = []
PAD = action_to_id.get("<PAD>", 0)
BOS = action_to_id.get("<BOS>", 1)
EOS = action_to_id.get("<EOS>", 2)
UNK = action_to_id.get("<UNK>", 3)

decoder_inputs = []
decoder_targets = []
encoder_inputs = []

# Construction brute des séquences
for x, y in zip(X, Y_idx):
    input_ids = model.encode(x, convert_to_tensor=True).to(device)
    decoder_input = [BOS] + y
    decoder_target = y + [EOS]

    encoder_inputs.append(input_ids)
    decoder_inputs.append(torch.tensor(decoder_input, dtype=torch.long))
    decoder_targets.append(torch.tensor(decoder_target, dtype=torch.long))

# Trouver la longueur max
max_len = max(max(len(seq) for seq in decoder_inputs),
              max(len(seq) for seq in decoder_targets))

# Padding des séquences
decoder_inputs_padded = pad_sequence(decoder_inputs, batch_first=True, padding_value=PAD)
decoder_targets_padded = pad_sequence(decoder_targets, batch_first=True, padding_value=PAD)

# Combine avec les entrées encodeur
for i in range(len(X)):
    data.append((encoder_inputs[i],
                 (decoder_inputs_padded[i], decoder_targets_padded[i])))



In [None]:
a = data[0]
b = data[1]
print(a[1][0].shape)
print(b[1][0].shape)

In [None]:
def collate_fn(batch):
    encoder_batch = torch.stack([item[0] for item in batch])
    decoder_input_batch = torch.stack([item[1][0] for item in batch])
    decoder_target_batch = torch.stack([item[1][1] for item in batch])
    return {
        "encoder_input": encoder_batch,
        "decoder_input": decoder_input_batch,
        "decoder_target": decoder_target_batch
    }

In [None]:
from torch.utils.data import DataLoader


# DataLoader
dataloader = DataLoader(
    data,
    batch_size=32,
    shuffle=True,
    collate_fn=lambda batch: collate_fn(batch)
)

# Exemple d'une itération
for batch in dataloader:
    encoder_input = batch["encoder_input"]        # (B, D)
    decoder_input = batch["decoder_input"]        # (B, T)
    decoder_target = batch["decoder_target"]      # (B, T)

In [None]:
def decode(sequences, id_to_action, stop_token="<EOS>"):
    decoded_sequences = []
    for sequence in sequences:
        decoded = []
        for idx in sequence:
            token = id_to_action.get(idx, "<UNK>")
            if token == stop_token:
                break
            decoded.append(token)
        decoded_sequences.append(decoded)
    return decoded_sequences

In [None]:
decoded_targets = decode(batch["decoder_target"], id_to_action)
print(batch["decoder_target"].tolist())
print(decoded_targets)

In [None]:
for idx in batch["decoder_input"][0].tolist():
    print(idx, id_to_action.get(idx, "<UNK>"))

In [None]:
"""
specials_keys = []
with open(SPECIAL_KEYS_PATH, "r") as vocab:
    for line in vocab:
        if not line.startswith("#"):
            specials_keys.append(line.strip())

print(specials_keys)
"""

In [None]:
"""
w = []
try:
    with open(VOCAB_PATH, "r") as vocab, open(OUTPUT_PATH, "w") as output:
        for line in vocab:
            word = line.strip()  # Strip whitespace from the word
            if word in specials_keys:
                word = "#"
            if not line.startswith("#") and word != "#":
                w.append(word)
                output.write(word + "\n")  # Write the word to the output file followed by a newline

    print(f"Filtered words have been written to {OUTPUT_PATH}")
except FileNotFoundError:
    print(f"The file at {VOCAB_PATH} was not found.")
except Exception as e:
    print(f"An error occurred: {e}")
"""

In [None]:
"""
key_list = [
    # Commandes système
    "'cmd+space'", "'cmd+tab'", "'enter'", "'return'", "'tab'", "'esc'",
    # Apps
    "'safari'", "'chrome'", "'terminal'", "'finder'",
     # Symboles techniques (à traiter comme un seul token)
    "'[',", "', '", "']'", "' ▁'", "'['cmd+space', '", "'safari', '", "'enter']'",
    # Modificateurs
    "'cmd'", "'shift'", "'ctrl'", "'alt'", "'fn'"
]
"""

In [None]:
"""
phase1_listx = []
phase1_listy = []
with open(phase1, "r") as file:
    for line in file:
        if line.startswith("Input"):
            phase1_listx.append(line)
        elif line.startswith("Output"):
            phase1_listy.append(line)
        
print(len(phase1_listx))
print(len(phase1_listy))
"""

In [None]:
"""
spm.SentencePieceTrainer.train(
    input=OUTPUT_PATH,
    model_prefix='tokenizer3',
    model_type='bpe',
    vocab_size=1500,  # Légèrement augmenté
    user_defined_symbols=key_list,
    pad_id=3,
    treat_whitespace_as_suffix=True,  # Nouveau
    split_by_whitespace=False,  # Important
    remove_extra_whitespaces=False
)
# Load trained tokenizer
sp = spm.SentencePieceProcessor(model_file=TokenIZER_PATH)
"""


In [None]:
"""vocab_size = len(sp)
print(f"Vocab size: {vocab_size}")"""

In [None]:
"""
# Test detokenization
print(sp.Decode([2, 4,1452,26, 10]))  # Example output: "▁Open ▁Alacritty ▁"
"""

In [None]:
"""
special_tokens = ["<s>", "<PAD>", "</s>"]  # 0, 1, 2
special_tokens_values = [sp.bos_id(), sp.pad_id(), sp.eos_id()]
print(special_tokens[1])
"""

In [None]:
MAX_LEN = 32

In [None]:
"""x_tokenized = [" ".join(sp.encode_as_pieces(seq)) for seq in X]
y_tokenized = [" ".join(sp.encode_as_pieces(seq)) for seq in Y]
print(x_tokenized[0])
print(y_tokenized[0])
"""

In [None]:
"""
def pad_and_convert_to_tensor(sequences, max_len=MAX_LEN, return_tensor=True):
    
    Encode text with SentencePiece, then pad/truncate to fixed length.
    Ensures all output tensors have length max_len.
    
    pad_id = sp.pad_id()  

    padded_sequences = []
    if return_tensor:
        for seq in sequences:
            encoded = sp.Encode(seq, add_bos=True, add_eos=True, out_type=int)

            # Troncature
            encoded = encoded[:max_len - 1]

            # Padding
            while len(encoded) < max_len:
                encoded.append(pad_id)

            
            padded_sequences.append(torch.tensor(encoded, dtype=torch.long).to(device))
    else:
            for seq in sequences:
                tokens = seq.strip().split()

                # Truncate if too long
                tokens = tokens[:max_len - 1]

                # Add <END>
                tokens.append("</s>")

                # Pad if too short
                while len(tokens) < max_len:
                    tokens.append("<PAD>")


                padded_sequences.append(" ".join(tokens))

    return padded_sequences
"""

In [None]:
"""
x_train = pad_and_convert_to_tensor(x_tokenized, return_tensor=False)
y_train = pad_and_convert_to_tensor(Y, return_tensor=True)
print(X[:2])
print(x_train[:2])
print()
print(Y[:2])
print(y_train[:2])
"""

In [None]:
"""
training_data = list(zip(x_train[:int(0.9*len(x_train))], y_train[:int(0.9*len(y_train))]))
test_data = list(zip(x_train[int(0.9*len(x_train)):], y_train[int(0.9*len(y_train)):]))

print(len(training_data))
print(len(test_data))
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
"""

In [None]:
"""
train_features, train_labels = next(iter(train_dataloader))
print(train_features[0])
print(train_labels[0].tolist())
print(sp.Decode(train_labels[0].tolist()))
"""

In [None]:

"""embeddings = model.encode(X)
embeddings = np.array(embeddings) #(n, 384)
#embeddings = np.permute_dims(embeddings, (1, 0)) # Should be (384, n) 
print(embeddings.shape)  
#print(embeddings)

projection = umap.UMAP(n_neighbors=5, n_components=3).fit_transform(embeddings[:20])
ax.scatter(projection[:, 0], projection[:, 1], projection[:, 2], marker='o')
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()"""

In [None]:
class ResidualFFN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_blocks=2):
        super(ResidualFFN, self).__init__()
        
        # Projection initiale
        self.input_proj = nn.Linear(input_dim, hidden_dim)
        
        # Blocs résiduels
        self.res_blocks = nn.ModuleList([
            ResidualBlock(hidden_dim) for _ in range(num_blocks)
        ])
        
        # Projection finale
        self.output_proj = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = self.input_proj(x)
        
        # Appliquer les blocs résiduels
        for block in self.res_blocks:
            x = block(x)
            
        return self.output_proj(x)
        
class ResidualBlock(nn.Module):
    def __init__(self, dim, dropout=0.3):
        super(ResidualBlock, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(dim, dim * 4),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim * 4, dim)
        )
        self.norm = nn.LayerNorm(dim)
        
    def forward(self, x):
        return self.norm(x + self.layers(x))

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, dim, max_len=512):
        super().__init__()
        pe = torch.zeros(max_len, dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)  # (max_len, 1, dim)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: (seq_len, batch_size, dim)
        x = x + self.pe[:x.size(0)]
        return x

class MacOSActionModel(nn.Module):
    def __init__(self, encoder, dim, hidden, vocab_size, max_len=128):
        super().__init__()
        self.encoder = encoder  # pretrained SentenceTransformer
        self.rffn = ResidualFFN(384, hidden, dim)
        self.embedding = nn.Embedding(vocab_size, dim)
        self.pos_encoding = PositionalEncoding(dim, max_len=max_len)
        self.decoder_layer = nn.TransformerDecoderLayer(d_model=dim, nhead=16, dim_feedforward=hidden, dropout=0.3)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=2)  # Réduction de 6 à 2 couches
        self.final_projection = nn.Linear(dim, vocab_size)
        self.max_len = max_len
        self.dim = dim
        self.vocab_size = vocab_size

    def forward(self, x_texts, tgt):
        """
        x_texts: list of strings, len = batch_size
        tgt: tensor of shape (batch_size, seq_len)
        """
        batch_size = len(x_texts)
        
        # Encode input texts
        with torch.no_grad():
            x = self.encoder.encode(x_texts, convert_to_tensor=True)  # shape: (batch_size, 384)
        x = self.rffn(x)  # shape: (batch_size, dim)
       

        # Prepare target sequence
        tgt = tgt.to(device)
        tgt = self.embedding(tgt)  # (batch_size, seq_len, dim)
        tgt = tgt.permute(1, 0, 2)  # (seq_len, batch_size, dim)
        tgt = self.pos_encoding(tgt)  # add positional encoding

        # Create mask for autoregressive decoding
        seq_len = tgt.size(0)
        tgt_mask = nn.Transformer.generate_square_subsequent_mask(seq_len).to(tgt.device)
        x = x.unsqueeze(0).repeat(seq_len, 1, 1)  # (seq_len, batch_size, dim)
        # Decode
        z = self.transformer_decoder(tgt, x, tgt_mask=tgt_mask)  # (seq_len, batch_size, dim)
        z = self.final_projection(z)  # (seq_len, batch_size, vocab_size)
        z = z.permute(1, 0, 2)  # (batch_size, seq_len, vocab_size)

        return z
    
    def forward_training(self, x, tgt):
        """
        x: encoder output (batch_size, dim)
        tgt: tensor of shape (batch_size, seq_len)
        """

        # Projette x dans le bon espace si nécessaire
        x = self.rffn(x)  # (batch_size, dim)

        # Embedding + Positional encoding
        tgt = self.embedding(tgt)  # (batch_size, seq_len, dim)
        tgt = tgt.permute(1, 0, 2)  # (seq_len, batch_size, dim)
        tgt = self.pos_encoding(tgt)

        # Memory (encoder output) doit être (seq_len_enc, batch_size, dim)
        # Ici on suppose x est global, donc on le répète
        x = x.unsqueeze(0)  # (1, batch_size, dim)

        # Masque auto-régressif pour le décodeur
        seq_len = tgt.size(0)
        tgt_mask = nn.Transformer.generate_square_subsequent_mask(seq_len).to(tgt.device)

        # Transformer decoder
        z = self.transformer_decoder(tgt, x, tgt_mask=tgt_mask)  # (seq_len, batch_size, dim)
        z = self.final_projection(z)  # (seq_len, batch_size, vocab_size)
        z = z.permute(1, 0, 2)  # (batch_size, seq_len, vocab_size)

        return z

    
    @torch.no_grad()
    def generate(self, x_text:list[str], max_len=32, start_token_id=1, end_token_id=2):
        """
        x_text : liste de string
        Retourne une liste de listes contenant les ID générés
        """
        # Encode input texts
        with torch.no_grad():
            x = self.encoder.encode(x_text, convert_to_tensor=True)
        # Encoder: passe par rffn si nécessaire
        x = self.rffn(x)  # (batch_size, dim)
        memory = x.unsqueeze(0)  # (1, batch_size, dim)

        batch_size = x.size(0)
        device = x.device

        # Initialiser avec <BOS>
        generated = torch.full((batch_size, 1), start_token_id, dtype=torch.long, device=device)

        for _ in range(max_len):
            # Embed + position
            tgt_embed = self.embedding(generated)  # (batch_size, seq_len, dim)
            tgt_embed = tgt_embed.permute(1, 0, 2)  # (seq_len, batch_size, dim)
            tgt_embed = self.pos_encoding(tgt_embed)

            # Masque causal
            seq_len = generated.size(1)
            tgt_mask = nn.Transformer.generate_square_subsequent_mask(seq_len).to(device)

            # Decode
            output = self.transformer_decoder(tgt_embed, memory, tgt_mask=tgt_mask)
            logits = self.final_projection(output)  # (seq_len, batch_size, vocab_size)
            next_token_logits = logits[-1, :, :]  # dernier pas de temps → (batch_size, vocab_size)

            # Greedy : choisir l'indice du max
            next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)  # (batch_size, 1)

            # Ajouter à la séquence
            generated = torch.cat([generated, next_token], dim=1)

            # Option d'arrêt : si tous les batchs ont généré <EOS>
            if (next_token == end_token_id).all():
                break

        return generated  # (batch_size, seq_len_generated)

In [None]:
dim, hidden, = 512, 512

In [None]:
actor = MacOSActionModel(model, dim, hidden, vocab_size).to(device)

In [None]:
actor

In [None]:
"""
train_features, train_labels = next(iter(train_dataloader))
p = actor(train_features[:2], train_labels[:2])
print("target shape :", train_labels[0].shape)
print("Model output shape :", p.shape)
out = torch.argmax(p, dim=-1).squeeze(0).tolist()
print("Model output indices :", out)

# Convert the indices to words
decoded_words = sp.Decode(out)
print("Model output :",decoded_words)

p = actor.generate(["open safari", "open alacritty"], max_length=20, beam_size=5)
print("Model output shape :", p.shape)
print("Model output indices :", sp.Decode(p[0].tolist()))
print("Model output indices :", sp.Decode(p[1].tolist()))
"""


In [None]:
print(batch["encoder_input"].shape)
print(batch["decoder_target"][0])
print(batch["decoder_target"][:, 1:][0])
print(batch["decoder_input"][0])
print(batch["decoder_input"][:, :-1][0])

In [None]:
p = actor.forward_training(batch["encoder_input"].to(device), batch["decoder_target"].to(device))
print(p)
print(p.shape)

In [None]:
@torch.no_grad()
def evaluate_model(model, test_dataset):
    model.eval()
    criterion = nn.CrossEntropyLoss(ignore_index=0)  
    total_loss = 0

    for test_batch_x, test_batch_y in test_dataset:
        test_batch_y = test_batch_y.to(device)
        tgt_input = test_batch_y[:, :-1]
        tgt_output = test_batch_y[:, 1:]

        output = model(test_batch_x, tgt_input)
        loss = criterion(output.reshape(-1, vocab_size), tgt_output.reshape(-1))
        total_loss += loss.item()

    return total_loss / len(test_dataset)

In [None]:
def train_model(model, train_dataset, test_dataset, epochs, learning_rate):
    model = model.to(device)
    model.train()

    criterion = nn.CrossEntropyLoss(ignore_index=0)     
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    epoch_losses = []

    for epoch in range(epochs):
        total_loss = 0

        for batch in train_dataset:
            encoder_input = batch["encoder_input"].to(device)
            decoder_input = batch["decoder_input"].to(device)
            decoder_target = batch["decoder_target"].to(device)
            
            # tgt_input : tout sauf le dernier token
            decoder_input = decoder_input
            # tgt_output : tout sauf le premier token (ce qu’on doit prédire)
            decoder_target = decoder_target

            optimizer.zero_grad()
            output = model.forward_training(encoder_input, decoder_input)  # shape: (batch_size, seq_len, vocab_size)
            
            loss = criterion(output.reshape(-1, vocab_size), decoder_target.reshape(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_dataset)
        epoch_losses.append(avg_loss)

        #val_loss = evaluate_model(model, test_dataset)
        scheduler.step()

        #print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Validation Loss: {val_loss:.4f}")
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}")

    return epoch_losses

In [None]:
train_model(actor, dataloader, None, epochs=2, learning_rate=4e-5)  # best lr for now is 1e-4, 4e-5, 2e-5 avec 2 epochs

In [None]:
import json

# save model
torch.save(actor.state_dict(), 'pretrainded_actor.pth')

with open('id_to_action.json', 'w') as fp:
    json.dump(id_to_action, fp)
    
with open('action_to_id.json', 'w') as fp:
    json.dump(action_to_id, fp)

In [None]:

p = actor.generate(["open finder"], max_len=32)
print("Model output shape :", p.shape)
print(p)
#print("Model output indices :", clean_generation(sp.Decode(p[1].tolist())))
#print("Model output indices :", clean_generation(sp.Decode(p[-1].tolist())))
print([id_to_action[n] for n in p[0].tolist() if n!=1 and n!=2 and n!=3])

In [None]:
actor.load_state_dict(torch.load('pretrainded_actor.pth'))
actor.eval()

In [None]:

p = actor.generate(["open weather"], max_len=32)
print("Model output shape :", p.shape)
print(p)
#print("Model output indices :", clean_generation(sp.Decode(p[1].tolist())))
#print("Model output indices :", clean_generation(sp.Decode(p[-1].tolist())))
print([id_to_action[n] for n in p[0].tolist() if n!=1 and n!=2 and n!=3])