In [56]:
import os
import numpy as np
import random
from collections import defaultdict
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time

In [57]:
tile_dir = '../data/npz_all/npz/tile/xla/'
train_dir = tile_dir + 'train/'
valid_dir = tile_dir + 'valid/'
test_dir = tile_dir + 'test/'

print(len(os.listdir(train_dir)))
print(len(os.listdir(valid_dir)))
print(len(os.listdir(test_dir)))

5709
676
844


In [58]:
data = np.load(valid_dir + os.listdir(valid_dir)[0])

In [59]:
for k, v in data.items():
    print(k, v.shape)

node_feat (27, 140)
node_opcode (27,)
edge_index (26, 2)
config_feat (1893, 24)
config_runtime (1893,)
config_runtime_normalizers (1893,)


In [60]:
np.mean(data['config_runtime'])

1760332.9302694136

In [131]:
def slowdown(y_true, y_pred, k):
    y_fastest = np.min(y_true)

    slowdowns_actual = y_true / y_fastest

    top_k_indices = y_pred[:k]
    
    top_k_predicted_times = np.min(y_true[top_k_indices])

    ratio = top_k_predicted_times / y_fastest

    return ratio - 1 


def speed_score(y_true, y_pred, k):
    return 1 - slowdown(y_true, y_pred, k) 

# print(speed_score(np.array([1.24, 0.231, 4.2, 2.01]), np.array([2, 0, 3, 1]), 1))

In [139]:
class LayoutDataset(Dataset):
    def __init__(self, filenames):
        self.filenames = filenames
        self.current_file_data = None
        self.current_file_idx = -1
        self.trials_per_file = self.precompute_trials_per_file()
        self.cumulative_trials = np.cumsum(self.trials_per_file)
    
    def precompute_trials_per_file(self):
        trials_per_file = []
        for filename in self.filenames:
            with np.load(filename, allow_pickle=True) as data:
                trials_per_file.append(len(data['config_runtime']))
        return trials_per_file
    
    def load_file(self, filename):
        if self.current_file_data is not None:
            del self.current_file_data  # Close current file data if any
        self.current_file_data = dict(np.load(filename, allow_pickle=True))
    
    def __len__(self):
        return self.cumulative_trials[-1]
    
    def __getitem__(self, idx):
        if idx >= self.__len__():
            raise IndexError("Index out of range")
        
        file_idx = np.searchsorted(self.cumulative_trials, idx, side='right')
        if file_idx != self.current_file_idx:
            self.load_file(self.filenames[file_idx])
            self.current_file_idx = file_idx

        trial_idx = idx - self.cumulative_trials[file_idx - 1] if file_idx > 0 else idx
        return self.get_trial_data(self.current_file_data, file_idx, trial_idx)
    
    def get_trial_data(self, file_data, file_idx, trial_idx):
        config_feat = torch.from_numpy(file_data['config_feat'][trial_idx])
        node_feat = torch.from_numpy(file_data['node_feat'])
        node_opcode = torch.from_numpy(file_data['node_opcode'])
        config_runtime = torch.tensor([file_data['config_runtime'][trial_idx] / file_data['config_runtime_normalizers'][trial_idx]])

        node_feat = torch.concat([node_feat, node_opcode.unsqueeze(1)], axis=1)
        
        return config_feat, node_feat, config_runtime, torch.tensor([file_idx]), torch.tensor([trial_idx])



def pad_sequence(sequences, batch_first=True, padding_value=-1):
    max_len = max([s.size(0) for s in sequences])
    batch_size = len(sequences)
    max_size = sequences[0].size(1)
    padded_batch = torch.full((batch_size, max_len, max_size), padding_value)
    for i, sequence in enumerate(sequences):
        length = sequence.size(0)
        padded_batch[i, :length] = sequence
    return padded_batch

def custom_collate_fn(batch):
    config_feat_list, node_feat_list, config_runtime_list, file_idx, trial_idx = zip(*batch)

    config_feat = torch.stack(config_feat_list)
    config_runtime = torch.stack(config_runtime_list)
    file_idxs = torch.stack(file_idx)
    trial_idxs = torch.stack(trial_idx)
    
    

    node_feat_padded = pad_sequence(node_feat_list, batch_first=True)

    return config_feat, node_feat_padded, config_runtime, file_idxs, trial_idxs

class BufferedRandomSampler:
    def __init__(self, data_source_length, buffer_size=200):
        self.data_source_length = data_source_length
        self.buffer_size = buffer_size
        self.buffer = []
        self.index_iter = iter(range(data_source_length))  
    
    def fill_buffer(self):
        try:
            while len(self.buffer) < self.buffer_size:
                self.buffer.append(next(self.index_iter))
        except StopIteration:
            pass

    def __iter__(self):
        return self

    def __next__(self):
        if not self.buffer:  
            self.fill_buffer()
            if not self.buffer:  
                raise StopIteration

        random.shuffle(self.buffer)
        return self.buffer.pop()


In [141]:
def random_model(node_feat, config_feat):
    return random.random()

filenames = [valid_dir + f for f in os.listdir(valid_dir)]

dataset = LayoutDataset(filenames=filenames) 
print('finished dataset', len(dataset))
# print(dataset.cumulative_trials)
sampler = BufferedRandomSampler(len(dataset))
dataloader = DataLoader(dataset, batch_size=128, shuffle=False, collate_fn=custom_collate_fn)
model = random_model


start = time.time()
preds = defaultdict(list)
for i, data in enumerate(dataset):
    pass

print(time.time() - start)
#     # config_feat, node_feat, config_runtime, file_idx, trial_idx = data

#     # runtime = model(node_feat, config_feat)

#     # preds[file_idx].append({'runtime': runtime, 'pred': config_runtime.item()})

#     if i % 1000 == 0:
#         print(i)

# scores = []
# for file in preds.values():
#     file = sorted(file, key=lambda x: x['runtime'])
#     for i, d in enumerate(file):
#         d['idx'] = i
#     file = sorted(file, key=lambda x: x['pred'])
#     indices = np.array([f['idx'] for f in file])
#     runtimes = np.array([f['runtime'] for f in file])

#     print(file)

#     scores.append(speed_score(runtimes, indices, 3))
# print(np.mean(scores)) 

finished dataset 1042735
37.36007881164551


In [127]:
len(filenames)

676

In [128]:
class FullHeightConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_width, stride_width=1):
        super(FullHeightConv, self).__init__()
        # The kernel size is (24, kernel_width)
        # Stride is (1, stride_width) to move one step down and stride_width steps to the right
        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(24, kernel_width),
            stride=(1, stride_width)
        )

    def forward(self, x):
        return self.conv(x)

# Example usage:
# Assuming the input has 1 channel, we want to produce 16 feature maps,
# and we want a kernel width of 3 with a stride of 1 in the N dimension.
conv_layer = FullHeightConv(in_channels=1, out_channels=16, kernel_width=3, stride_width=1)

# Now let's create a dummy input tensor of shape (batch_size, channels, 24, N)
# where N could be any size, for instance, let's take N=50
input_tensor = torch.randn(2, 1, 24, 50)  # batch_size=2, channels=1, height=24, width=50

# Apply the convolutional layer to the input
output = conv_layer(input_tensor)

# Print the output shape
print(output.shape)  # This will print the shape of the output tensor


torch.Size([2, 16, 1, 48])


In [23]:
for file in filenames:
    data= np.load(file)
    for i in range(len(data['config_runtime'])):
        if i % 1000 == 0:
            print(i)


0
1000
0
0
1000
0
0
0
0
0
0
0
0
0
1000
2000
3000
4000
5000
6000
7000
8000
0
0
0
1000
2000
3000
0
1000
0
0
0
1000
2000
3000
4000
5000
6000
0
0
0
0
0
0
0
0
0
0
0
0
1000
0
0
1000
0
0
0
0
1000
0
0
0
0
0
0
0
1000
2000
3000
4000
5000
6000
7000
0
0
1000
0
0
1000
2000
3000
4000
5000
6000
0
0
1000
0
0
0
1000
0
0
0
1000
2000
0
0
0
1000
2000
3000
4000
0
0
1000
2000
0
0
0
0
1000
2000
0
0
1000
2000
3000
4000
5000
0
0
1000
0
0
0
0
1000
0
0
1000
0
0
0
1000
2000
3000
0
1000
0
1000
2000
0
1000
0
1000
2000
3000
0
0
1000
0
1000
2000
3000
4000
5000
0
0
1000
2000
3000
4000
5000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
1000
2000
0
0
0
0
0
0
0
0
0
1000
0
0
1000
0
1000
2000
3000
4000
5000
0
0
1000
2000
0
1000
2000
3000
4000
0
0
0
1000
2000
3000
4000
5000
6000
0
0
0
1000
2000
3000
4000
5000
6000
7000
8000
0
0
0
0
0
1000
0
1000
0
0
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
0
1000
0
0
0
1000
0
1000
2000
3000
4000
0
0
1000
2000
3000
4000
0
1000
2000
3000
4000
0
0
1000
0
0
0
0
0
1000
2000
3000
0
100