# 

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import Counter
import math
import numpy as np
import re
import os
import pandas as pd
import os
from zipfile import ZipFile, BadZipFile
import json
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances


os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

torch.manual_seed(23)

<torch._C.Generator at 0x79bf20473e10>

In [2]:
def check_gpu():
    if torch.cuda.is_available():
        print("CUDA está disponible.")
        print(f"Hay {torch.cuda.device_count()} GPU(s) disponible(s).")
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    else:
        print("CUDA no está disponible. No hay GPU accesible.")

check_gpu()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

CUDA está disponible.
Hay 1 GPU(s) disponible(s).
GPU 0: NVIDIA GeForce RTX 2060


## TRANSFORMER ARCHITECTURE

In [3]:
MAX_SEQ_LEN = 128 # max num of words per phrase for translate

In [4]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_seq_len = MAX_SEQ_LEN):
        super().__init__()
        self.pos_embed_matrix = torch.zeros(max_seq_len, d_model, device=device)
        token_pos = torch.arange(0, max_seq_len, dtype = torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() 
                             * (-math.log(10000.0)/d_model))
        self.pos_embed_matrix[:, 0::2] = torch.sin(token_pos * div_term)
        self.pos_embed_matrix[:, 1::2] = torch.cos(token_pos * div_term)
        self.pos_embed_matrix = self.pos_embed_matrix.unsqueeze(0).transpose(0,1)
        
    def forward(self, x):
#         print(self.pos_embed_matrix.shape)
#         print(x.shape)
        return x + self.pos_embed_matrix[:x.size(0), :]

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model = 512, num_heads = 8):
        super().__init__()
        assert d_model % num_heads == 0, 'Embedding size not compatible with num heads'
        
        self.d_v = d_model // num_heads
        self.d_k = self.d_v
        self.num_heads = num_heads
        
        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)
        
    def forward(self, Q, K, V, mask = None):
        batch_size = Q.size(0)
        '''
        Q, K, V -> [batch_size, seq_len, num_heads*d_k]
        after transpose Q, K, V -> [batch_size, num_heads, seq_len, d_k]
        '''
        Q = self.W_q(Q).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2 )
        K = self.W_k(K).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2 )
        V = self.W_v(V).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2 )
        
        weighted_values, attention = self.scale_dot_product(Q, K, V, mask)
        weighted_values = weighted_values.transpose(1, 2).contiguous().view(batch_size, -1, self.num_heads*self.d_k)
        weighted_values = self.W_o(weighted_values)
        
        return weighted_values, attention
        
        
    def scale_dot_product(self, Q, K, V, mask = None):
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        attention = F.softmax(scores, dim = -1)
        weighted_values = torch.matmul(attention, V)
        
        return weighted_values, attention
        

class PositionFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super().__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        
    def forward(self, x):
        return self.linear2(F.relu(self.linear1(x)))
    
class EncoderSubLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout = 0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.ffn = PositionFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.droupout1 = nn.Dropout(dropout)
        self.droupout2 = nn.Dropout(dropout)
    
    def forward(self, x, mask = None):
        attention_score, _ = self.self_attn(x, x, x, mask)
        x = x + self.droupout1(attention_score)
        x = self.norm1(x)
        x = x + self.droupout2(self.ffn(x))
        return self.norm2(x)

class Encoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, dropout=0.1):
        super().__init__()
        self.layers = nn.ModuleList([EncoderSubLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.norm = nn.LayerNorm(d_model)
    def forward(self, x, mask=None):
        for layer in self.layers:
            x = layer(x, mask)
        return self.norm(x)

class DecoderSubLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        
    def forward(self, x, encoder_output, target_mask=None, encoder_mask=None):
        attention_score, _ = self.self_attn(x, x, x, target_mask)
        x = x + self.dropout1(attention_score)
        x = self.norm1(x)
        
        encoder_attn, _ = self.cross_attn(x, encoder_output, encoder_output, encoder_mask)
        x = x + self.dropout2(encoder_attn)
        x = self.norm2(x)
        
        ff_output = self.feed_forward(x)
        x = x + self.dropout3(ff_output)
        return self.norm3(x)
        
class Decoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, dropout=0.1):
        super().__init__()
        self.layers = nn.ModuleList([DecoderSubLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.norm = nn.LayerNorm(d_model)
        
    def forward(self, x, encoder_output, target_mask, encoder_mask):
        for layer in self.layers:
            x = layer(x, encoder_output, target_mask, encoder_mask)
        return self.norm(x)

In [5]:
class Transformer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers,
                 input_vocab_size, max_len=MAX_SEQ_LEN, dropout=0.1):
        super().__init__()
        self.encoder_embedding = nn.Embedding(input_vocab_size, d_model)
        self.pos_embedding = PositionalEmbedding(d_model, max_len)
        self.encoder = Encoder(d_model, num_heads, d_ff, num_layers, dropout)

        # Embeddings
        self._cached_source_embeddings = None
        
    def forward(self, source):
        # Encoder mask
        source_mask = self.mask(source)
        # Embedding and positional Encoding
        source = self.encoder_embedding(source) * math.sqrt(self.encoder_embedding.embedding_dim)
        self._cached_source_embeddings = source
        source = self.pos_embedding(source)
        # Encoder
        encoder_output = self.encoder(source, source_mask)
        
        return encoder_output
        
    def get_embeddings(self):
        if self._cached_source_embeddings is None:
            raise ValueError("Embeddings not computed yet. Call forward() first.")
        return self._cached_source_embeddings
    
    def mask(self, source):
        source_mask = (source != 0).unsqueeze(1).unsqueeze(2)
        return source_mask

## Simple test

In [6]:
seq_len_source = 10
seq_len_target = 10
batch_size = 2
input_vocab_size = 50
target_vocab_size = 50

source = torch.randint(1, input_vocab_size, (batch_size, seq_len_source))

d_model = 512
num_heads = 8
d_ff = 2048
num_layers = 6

model = Transformer(d_model, num_heads, d_ff, num_layers,
                   input_vocab_size, max_len=MAX_SEQ_LEN, dropout=0.1)

model = model.to(device)
source = source.to(device)

output = model(source)
#Expected output shape -> [batch, seq_len_target, target_vocab_size] i.e [2, 10, 50]
print(f'output.shape {output.shape}')
print(source)

output.shape torch.Size([2, 10, 512])
tensor([[39, 11,  6, 31, 11, 42, 49, 35, 10, 20],
        [24, 12, 35, 20, 30,  6, 23, 39, 48,  5]], device='cuda:0')


## DATA PREPROCESSING: Creation of build_vocab and company...

In [7]:
def build_vocab(scripts):
    blocks = [block for script in scripts for block in script.split() ]
    block_count = Counter(blocks)
    sorted_block_counts = sorted(block_count.items(), key=lambda x:x[1], reverse=True)
    block2idx = {block: idx for idx, (block, _) in enumerate(sorted_block_counts, 2)}
    block2idx['<pad>'] = 0
    block2idx['<unk>'] = 1
    idx2block = {idx: block for block, idx in block2idx.items()}
    return block2idx, idx2block


class TripletDataset(Dataset):
    def __init__(self, src_sentences, src_block2idx):
        self.src_sentences = src_sentences  # Anchor (source)
        self.src_block2idx = src_block2idx
        
    def __len__(self):
        return len(self.src_sentences)  # Usamos la longitud del conjunto positivo
    
    def __getitem__(self, idx):
        # Obtener las oraciones (anchor, positive, negative)
        src_sentence = self.src_sentences[idx]
        

        # Convertir cada oración en índices
        src_idxs = [self.src_block2idx.get(block, self.src_block2idx['<unk>']) for block in src_sentence.split()]
       

        # Retornar los tensores (anchor, positive, negative)
        return torch.tensor(src_idxs)

## TRAIN FUNCTIONS

In [8]:
def collate_fn(batch):
    src_batch = zip(*batch)
    src_batch = [torch.tensor(seq) for seq in src_batch] 

    #print(trg_batch)
    # Hacemos el padding sin truncar primero
    #trg_batch = torch.nn.utils.rnn.pad_sequence(trg_batch, batch_first=True, padding_value=0)
    #src_batch = torch.nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=0)
    #neg_batch = torch.nn.utils.rnn.pad_sequence(neg_batch, batch_first=True, padding_value=0)
    
    # Ahora truncamos las secuencias después del padding a MAX_SEQ_LEN
    #trg_batch = trg_batch[:, :MAX_SEQ_LEN]
    #src_batch = src_batch[:, :MAX_SEQ_LEN]
    #neg_batch = neg_batch[:, :MAX_SEQ_LEN]

    src_batch = [torch.nn.functional.pad(seq[:MAX_SEQ_LEN], (0, MAX_SEQ_LEN - len(seq[:MAX_SEQ_LEN])), value=0) for seq in src_batch]

    src_batch = torch.stack(src_batch)
    
    return src_batch

In [13]:
def train(model, dataloader, loss_function, optimiser, epochs):
    model.train()
    final_anchor_embeddings = []
    final_positive_embeddings = []
    final_negative_embeddings = []
    
    for epoch in range(epochs):
        total_loss = 0
        for i, (anchor_batch, positive_batch, negative_batch) in enumerate(dataloader):
            
            anchor_batch = anchor_batch.to(device)
            positive_batch = positive_batch.to(device)
            negative_batch = negative_batch.to(device)
            
            # Zero grads
            optimiser.zero_grad()

            # Forward para anchor, positive y negative
            anchor_embeddings = model(anchor_batch)
            positive_embeddings = model(positive_batch)
            negative_embeddings = model(negative_batch)
            
            # Almacenar los embeddings solo en el último epoch
            if epoch == epochs - 1:
                final_anchor_embeddings.append(anchor_embeddings.cpu().detach())
                final_positive_embeddings.append(positive_embeddings.cpu().detach())
                final_negative_embeddings.append(negative_embeddings.cpu().detach())


            

            # Calcular la pérdida de Triplet
            loss = loss_function(anchor_embeddings, positive_embeddings, negative_embeddings)

            # Backpropagation y actualización de parámetros
            loss.backward()
            optimiser.step()

            total_loss += loss.item()
            
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch: {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}')
    
    # Concatenar los embeddings del último epoch
    final_anchor_embeddings = torch.cat(final_anchor_embeddings, dim=0)
    final_positive_embeddings = torch.cat(final_positive_embeddings, dim=0)
    final_negative_embeddings = torch.cat(final_negative_embeddings, dim=0)

    return final_anchor_embeddings, final_positive_embeddings, final_negative_embeddings


def evaluate(model, dataloader, reference_embeddings, loss_function=None):
    model = model.to(device)
    model.eval()  # Modo evaluación del modelo
    
    total_loss = 0

    with torch.no_grad():  # No calculamos gradientes en evaluación
        for i, project_batch in enumerate(dataloader):
            # Pasar el batch al dispositivo
            project_batch = project_batch.to(device)
            
            # Obtener embeddings del proyecto actual
            project_embeddings = model(project_batch)
            
    return project_embeddings

## **EVALUATION**

In [14]:
model = torch.load('action_global_scratch_triplet.pth').to(device)
train_src_embeddings = model.get_embeddings()
print("Source embeddings:", train_src_embeddings.shape)

Source embeddings: torch.Size([11, 128, 512])


In [15]:
class EvaluateModel():
    def __init__(self, project_path, model, train_src_embeddings):
        self.model = model
        self.train_src_embeds = train_src_embeddings
        print(self.train_src_embeds.shape)
        self.distances_cousine = []
        self.distances_euclidean = []
        self.project_path = project_path
        self.scripts = []
        self.dummy_target = self.scripts

        
        self.src_block2idx = {}
        self.src_idx2block = {}
        self.src_vocab_size = None

        # ---- EVALUATION EMBEDDINGS -----
        self.src_embeddings = None
        self.trg_embeddings = None
        self.neg_embeddigns = None
        # --------------------------------

        # Parameters for eval
        self.BATCH_SIZE = 16
        self.loss_function = nn.TripletMarginLoss(margin=1.0)
        self.optimiser = optim.Adam(model.parameters(), lr=0.0001)

        self.dataset = None
        self.dataloader = None

        # Distances
        self.dis_cos = None
        self.dis_eu = None
    
    def execute(self):
        self.locate_project()
        self.add_pads_to_scritps()
        self.build_vocabulary()

        # Create dataset and dataloader
        self.dataset = TripletDataset(self.scripts, self.src_block2idx)
        self.dataloader = DataLoader(self.dataset, batch_size=self.BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
        print("hemos llegado 1")
        # Eval model
        model.eval()
        print("hemos llegado 2")
        self.src_embeddings = evaluate(self.model, self.dataloader, self.loss_function)
        print("hemos llegado 3")
        self.calc_distances()

    def get_train_emb(self):
        train_src_embeddings, _ = self.model.get_embeddings()
        return train_src_embeddings

    def calc_distances(self):
        src_embeddings_global = self.src_embeddings.cpu().detach().numpy()
        trg_embeddings_global = self.train_src_embeds.cpu().detach().numpy()
        print("Shape trg_embeds: ", trg_embeddings_global.shape)
        print("Shape src_embeds: ", src_embeddings_global.shape)
            
        # evaluation embeddings
        src_embeddings_global_shape, trg_embeddings_global_shape = src_embeddings_global.shape, trg_embeddings_global.shape
        src_embeddings_global_new_dim = src_embeddings_global_shape[0] * src_embeddings_global_shape[1]
        trg_embeddings_global_new_dim = trg_embeddings_global_shape[0] * trg_embeddings_global_shape[1]
        src_embeddings_global, trg_embeddings_global = src_embeddings_global.reshape(src_embeddings_global_new_dim, 512), trg_embeddings_global.reshape(trg_embeddings_global_new_dim, 512)
    
        # Comparar embeddings de evaluación con los embeddings de entrenamiento
        cos_sim = cosine_similarity(src_embeddings_global, trg_embeddings_global)
        euclidean_dist = euclidean_distances(src_embeddings_global, trg_embeddings_global)

        self.dis_cos = cos_sim.mean()
        self.dis_eu = euclidean_dist.mean()

    def get_distances(self):
        return self.dis_cos, self.dis_eu

    def build_vocabulary(self):
        self.src_block2idx, self.src_idx2block = self.build_vocab()
        self.src_vocab_size = len(self.src_block2idx)
        self.trg_block2idx, self.trg_idx2block = self.src_block2idx, self.src_idx2block
        self.trg_vocab_size = len(self.trg_block2idx)

    def build_vocab(self):
        blocks = [block for script in self.scripts for block in script.split() ]
        block_count = Counter(blocks)
        sorted_block_counts = sorted(block_count.items(), key=lambda x:x[1], reverse=True)
        block2idx = {block: idx for idx, (block, _) in enumerate(sorted_block_counts, 2)}
        block2idx['<pad>'] = 0
        block2idx['<unk>'] = 1
        idx2block = {idx: block for block, idx in block2idx.items()}
        return block2idx, idx2block

    def load_json_project(self, path_projectsb3):
        try:
            zip_file = ZipFile(path_projectsb3, "r")
            json_project = json.loads(zip_file.open("project.json").read())
            return json_project
        except BadZipFile:
            print('Bad zipfile')

    def process(self, json_project):
        seq_num = 0
        dict_total_blocks = {}
        
        list_total_blocks = []
    
        for key, list_info in json_project.items():
            if key == "targets":
                for dict_target in list_info:
                    target_name = dict_target.get('name')
                    if target_name:
                        dict_total_blocks[target_name] = {}
                        dict_total_blocks[target_name][f'Seq_{seq_num}'] = []
                    blocks = dict_target.get('blocks')
                    if blocks:
                        for block_id, block_info in blocks.items():
                            if isinstance(block_info, dict):
                                topLevel = block_info.get('topLevel')
                                if topLevel:
                                    seq_num += 1
                                    dict_total_blocks[target_name][f'Seq_{seq_num}'] = []
                                opcode = block_info.get('opcode')
                                if opcode:
                                    
                                    dict_total_blocks[target_name][f'Seq_{seq_num}'].append(opcode)
                                list_total_blocks.append(block_info)
                                #dict_total_blocks[block_id] = block_info
        return dict_total_blocks

    def locate_project(self):
        #sb3_path = os.path.join('.','sb3_action_global',project)
        if os.path.isfile(self.project_path):
            #print(project)
            json_project = self.load_json_project(self.project_path)
            dict_total_blocks = self.process(json_project)
    
            for sprite, seqs in dict_total_blocks.items():
                for idx, block_list in seqs.items():
                    if block_list != []:
                        self.scripts.append(" ".join(block_list))
        else:
            raise("The project does not exists.")

    def add_pads_to_scritps(self):
        self.scripts = ['<sos> ' + script + ' <eos>' for script in self.scripts]


In [16]:
#sb3_project = os.path.join("./Beat the Robot.sb3")
sb3_project = os.path.join("./storytelling.sb3")

evaluation_obj = EvaluateModel(sb3_project, model, train_src_embeddings)
evaluation_obj.execute()

torch.Size([11, 128, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 128, 512)
Shape src_embeds:  (5, 128, 512)


In [17]:
dist_cos, dist_eu = evaluation_obj.get_distances()
print(f"Distancia coseno: {dist_cos}")
print(f"Distancia euclidea: {dist_eu}")

Distancia coseno: 0.12439966946840286
Distancia euclidea: 543.4035034179688


## EVALUATE EACH ONE OF GLOBAL PROJECT

In [18]:
metrics = pd.read_csv('metrics_attr.csv')

# Filter by Action Genre
metrics_action = metrics[(metrics['Main Genre'] == 'Action')]

# Check the filenames for collect
filenames_action = list(metrics_action['Name'])
# Create txt for made the shell script
with open('filenames_action_global.txt', 'w') as names:
     for name in filenames_action:
        names.write(name + '\n')

In [20]:
dist_cos_global = []
dist_eu_global = []
print(len(filenames_action))
evaluations = {}
for idx, project in enumerate(filenames_action):
    print(f"Project {idx}/{len(filenames_action)}")
    sb3_path = os.path.join('.','sb3_action_global',project)
    if os.path.isfile(sb3_path):
        #print(project)
        model = torch.load('action_global_scratch2.pth')
        train_src_embeddings = model.get_embeddings()
        
        #evaluations[project] = EvaluateModel(sb3_path, model, train_src_embeddings)
        #evaluations[project].execute()

        eval = EvaluateModel(sb3_path, model, train_src_embeddings)
        eval.execute()
        dist_cos, dist_eu = eval.get_distances()
        #dist_cos, dist_eu = evaluations[project].get_distances()
        dist_cos_global.append(dist_cos)
        dist_eu_global.append(dist_eu)



312
Project 0/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (5, 128, 512)
Project 1/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (7, 128, 512)
Project 2/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (4, 128, 512)
Project 3/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (3, 128, 512)
Project 4/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (7, 128, 512)
Project 5/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2
hemos llegado 3
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (4, 128, 512)
Project 6/312
torch.Size([11, 12, 512])
hemos llegado 1
hemos llegado 2


In [21]:
#print(dist_cos_global)


projects_simil = {project: similarity for project, similarity in zip(filenames_action, dist_cos_global)}
projects_simil = dict(sorted(projects_simil.items(), key=lambda item: item[1]))
print(projects_simil['Nora + Sage.sb3'])
print(projects_simil)

0.1389008
{'Polar Bear Fixes Climate Change.sb3': 0.08266613, 'David & Peter.sb3': 0.09001841, 'Gabe Umut-2.sb3': 0.09130949, 'Reducing Meat Consumption.sb3': 0.091474764, 'help the polar bear.sb3': 0.091627285, 'Brick Breaker remix.sb3': 0.092268914, 'climate change pong.sb3': 0.09252689, 'Climate Change Game_(2).sb3': 0.09269398, 'Greenhouse Gases Game.sb3': 0.09335795, 'Carbon Run.sb3': 0.0934551, 'Collect the CO2.sb3': 0.096609145, 'Reforestation and Deforestation- Saving this land.sb3': 0.09691623, 'Climate Change Catch Game.sb3': 0.09791252, 'Climate Control_(2).sb3': 0.09792567, 'Reducing Ocean Acidification.sb3': 0.09824653, 'Climate Change Project: Saving the penguin.sb3': 0.09891691, "Bryan and Mark's Game Project.sb3": 0.09915083, 'Super Carbon Bros.sb3': 0.09915173, 'Sammie and Alex Carbon Cycle Project.sb3': 0.10047329, 'Mark Jeremy.sb3': 0.10134343, 'gobo global harm.sb3': 0.10143384, 'Yuval and Rebecca Water cycle.sb3': 0.101533644, 'jabby.sb3': 0.101621166, "Franki and 

In [22]:
print(sum(dist_cos_global)/len(dist_cos_global))

0.1224679224709473


In [13]:
#sb3_project = os.path.join("./Beat the Robot.sb3")
model = torch.load('action_global_scratch2.pth')
train_src_embeddings, _ = model.get_embeddings()
sb3_project = os.path.join("./WindowsSimulator.sb3")

evaluation_obj = EvaluateModel(sb3_project, model, train_src_embeddings)
evaluation_obj.execute()

dist_cos, dist_eu = evaluation_obj.get_distances()
print(f"Distancia coseno: {dist_cos}")
print(f"Distancia euclidea: {dist_eu}")

torch.Size([11, 12, 512])


../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [96,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [97,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [98,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [99,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [100,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0], thread: [101,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [58,0,0],

RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [12]:
#sb3_project = os.path.join("./Beat the Robot.sb3")
#model = torch.load('action_global_scratch2.pth')
#train_src_embeddings, _ = model.get_embeddings()
sb3_project = os.path.join("./piano.sb3")

evaluation_obj = EvaluateModel(sb3_project, model, train_src_embeddings)
evaluation_obj.execute()

dist_cos, dist_eu = evaluation_obj.get_distances()
print(f"Distancia coseno: {dist_cos}")
print(f"Distancia euclidea: {dist_eu}")

torch.Size([11, 12, 512])
Evaluation Completed
Shape trg_embeds:  (11, 12, 512)
Shape src_embeds:  (7, 36, 512)
Distancia coseno: 0.31856071949005127
Distancia euclidea: 492.714599609375
