## A comparison between Pytorch ,TVM and Auto-TVM for timing in classification of a small model like resnet-18

In [1]:
import torch
from torchvision import models
from torchvision.io import read_image
from torchvision import transforms
from torch import Tensor
from torch import nn
import torch.backends.cudnn as cudnn

import os
import random
import requests
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

import timeit
import numpy as np


## Fetch the model
The model we are using is basic but functional. Also we are not here for training classification models so we will use the pretrained resnet-18.

In [2]:



def load_model_and_batch_size(batch_size):
    batch_size=batch_size
    model_name = "resnet18"
    model = models.resnet18(weights='ResNet18_Weights.IMAGENET1K_V1')
    #model = getattr(models, model_name)(pretrained=True)
    model = model.eval()
    return model, batch_size

def check_device():
    if torch.cuda.is_available():
        print("CUDA (GPU) is available.")
        device = torch.device("cuda")
    else:
        print("CUDA (GPU) is not available. Using CPU instead.")
        device = torch.device("cpu")
    return device

batch_size = 10
device = check_device()
model, batch_size = load_model_and_batch_size(batch_size)
model = model.to(device)

CUDA (GPU) is available.


## Load test images
Lets begin by creating some functions that will convert the image to the correct size for resnet-18

In [3]:
transform = transforms.Compose([            
     transforms.Resize(256),                    
     transforms.CenterCrop(224),                
     transforms.ToTensor(),                     
     transforms.Normalize(                      
     mean=[0.485, 0.456, 0.406],                
     std=[0.229, 0.224, 0.225]                  
)])

In [4]:
def display_images_with_labels(imgs, labels, system):
    num_images = len(imgs)
    fig, axes = plt.subplots(1, num_images, figsize=(12, 4))  # Adjust figsize as needed
    
    for i, (img, label) in enumerate(zip(imgs, labels)):
        img = img.squeeze(0)  # Remove the batch dimension if it exists
        if(system=="pytorch"):
            img = img.permute(1, 2, 0)  # Change the image tensor shape from (C, H, W) to (H, W, C)
        elif(system=="tvm"):
            img = np.transpose(img, (1, 2, 0))
        else:
            print("Wrong System please select tvm or pytorch")
        # Normalize pixel values to [0, 1]
        img = img - img.min()
        img = img / img.max()

        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(label, fontsize=10, pad=5)  # Display label on top of the image
    
    plt.show()
    
def load_random_images(batch_size):
    directory = "/home1/public/misampson/dataset/ILSVRC2015/Data/DET/test"
    files = os.listdir(directory)
    image_files = [f for f in files if f.endswith('.JPEG')]

    if not image_files:
        print("No image files found in the directory.")
        return None
    
    imgs = []
    chosen_image_files = []
    for _ in range(batch_size):
        random_image = random.choice(image_files)
        img_path = os.path.join(directory, random_image)
        chosen_image_files.append(img_path)  # Append the chosen image file path
        img = Image.open(img_path).convert("RGB")  # Convert to RGB format
        img_reshape = img.resize((224, 224))
        img_t = transform(img_reshape)
        imgs.append(img_t)
    
    imgs = torch.stack(imgs)
    
    with open("image_files.txt", "w") as f:
        f.write("\n".join(chosen_image_files))
    
    return imgs

def get_images():
    directory = "/home1/public/misampson/dataset/ILSVRC2015/Data/DET/test"
    file_path = "image_files.txt"  # Changed to the relative path of image_files.txt
    with open(file_path, "r") as f:
        image_files = f.read().splitlines()
    
    imgs = []
    for image_file in image_files:
        img = Image.open(image_file).convert("RGB")  # Load the image using the file path
        img_reshape = img.resize((224, 224))
        img_t = transform(img_reshape)
        imgs.append(img_t)
    
    imgs = torch.stack(imgs)
    return imgs

## Prepare the classes
Functions that print the results of the classes

In [5]:
def prediction_to_class(predictions):
    with open('imagenet_classes.txt') as f:
        classes = [line.strip() for line in f.readlines()]

    synsets_to_names = {}
    with open('imagenet_synsets.txt') as f:
        for line in f:
            parts = line.strip().split(' ', 1)
            synsets_to_names[parts[0]] = parts[1]

    batch_classes = []
    for prediction in predictions:
        class_name = synsets_to_names[classes[prediction]]
        batch_classes.append(class_name)

    return batch_classes


In [6]:
def timit(func, *args, **kwargs):
    timing_number = 10
    timing_repeat = 10
    
    warmup_results = timeit.repeat(lambda: func(*args, **kwargs), repeat=timing_repeat, number=timing_number)
    timing_results = timeit.repeat(lambda: func(*args, **kwargs), repeat=timing_repeat, number=timing_number)
    
    timing_summary = {
        "mean": sum(timing_results) / len(timing_results),
        "median": sorted(timing_results)[len(timing_results)//2],
        "std": np.std(timing_results),
    }
    
    print("Timing Summary:")
    print(timing_summary)
    return timing_summary


In [7]:
# def run_pytorch(imgs):
#     imgs=imgs.to(device)
#     output = model(imgs)
#     return output
    
# def process_pytorch(model, batch_size):
#     imgs = get_images()
#     print(imgs.shape)
#     labels = [] 
#     out = run_pytorch(imgs)  
#     print(out.shape)
#     for outputs in out:
#         _, indices = torch.topk(outputs, 1)
#         img_labels = prediction_to_class(indices) 
#         labels.append(img_labels)
#     return imgs, labels

# load_random_images(batch_size)
# imgs, labels = process_pytorch(model, batch_size)
# #display_images_with_labels(imgs, labels, "pytorch")

## TVM without autotuning




In [8]:
import tvm
from tvm import relay, autotvm
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
import tvm.contrib.graph_executor as runtime
from tvm.contrib import graph_executor
import tvm.runtime
import pickle

In [9]:
tvm_lib = None
tvm_inp_name = None
target = tvm.target.Target("cuda")
dev = tvm.cuda(0)

In [10]:
def tvm_relay(batchsize):
    input_shape = [batchsize,3,224,224]
    input_name = "data"
    shape_list = [(input_name, input_shape)]
    input_data = torch.randn(input_shape,device='cuda')
    scripted_model = torch.jit.trace(model.to(device), input_data).eval()
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target=target, params=params)
    return lib,input_name

In [11]:
def run_module(mod):
    mod.run()
    return mod

def create_module(lib, imgs, input_name, batchsize):
    dtype = "float32"
    module = graph_executor.GraphModule(lib["default"](dev))
    images_cpu = imgs.cpu()
    images_np = np.array(images_cpu).reshape((batchsize, 3, 224, 224))
    
    # Convert NumPy array to TVM tensor
    images_tvm = tvm.nd.array(images_np.astype(dtype))
    
    module.set_input(input_name, images_tvm)
    mod = run_module(module)
    output = mod.get_output(0).asnumpy()
    prediction = np.argmax(output, axis=1)
    classes = prediction_to_class(prediction)
    return classes, module


In [12]:
def run_tvm(imgs,batchsize):
    global tvm_lib, tvm_inp_name
    imgs=imgs.to(device)
    if tvm_lib is None or tvm_inp_name is None:
        print("Loading relay for TVM module.")
        lib, inp_name = tvm_relay(batchsize)
        tvm_lib = lib  # Store the TVM module globally
        tvm_inp_name = inp_name  # Store the input name globally
    
    classes ,module = create_module(tvm_lib, imgs, tvm_inp_name,batchsize) 
    return imgs, classes, module

In [13]:
# load_random_images(batch_size)
# model, batch_size = load_model_and_batch_size(batch_size)
# imgs,classes, module=run_tvm(get_images(),batch_size)
# #display_images_with_labels(imgs, classes, "tvm")

In [14]:
# tvm_wo_autotune_time=timit(run_module,module)

In [15]:
import threading
import time
import numpy as np
import matplotlib.pyplot as plt

def execute_and_plot_tvm_timit():
    global tvm_lib, tvm_inp_name
    batch_sizes = [1, 10, 100, 200, 256]
    timing_results = []

    for batch_size in batch_sizes:
        #we want to enter relay for each batch
        tvm_lib = None
        tvm_inp_name = None
        load_random_images(batch_size)
        model, batch_size = load_model_and_batch_size(batch_size)
        imgs,classes, module=run_tvm(get_images(),batch_size)
        tvm_wo_autotune_time=timit(run_module,module)
        mean_time = tvm_wo_autotune_time["mean"]
        save_mean_time(batch_size, mean_time)

    timing_results = get_timing_results(batch_sizes)  # Retrieve timing results from saved files
    plot_timing_results(timing_results)


def save_mean_time(batch_size, mean_time):
    with open(f'tvm_timing_batch_{batch_size}.txt', 'w') as f:
        f.write(str(mean_time))

def get_timing_results(batch_sizes):
    timing_results = []
    for batch_size in batch_sizes:
        with open(f'tvm_timing_batch_{batch_size}.txt', 'r') as f:
            mean_time = float(f.read())
        timing_results.append((batch_size, mean_time))
    return timing_results

def plot_timing_results(timing_results):
    # Remove None values from timing_results
    timing_results = [result for result in timing_results if result is not None]

    if not timing_results:
        print("No timing results to plot.")
        return

    timing_results.sort(key=lambda x: x[1])  # Sort by mean time
    batch_sizes = [result[0] for result in timing_results]  # Extract batch sizes
    timing_means = [result[1] for result in timing_results]  # Extract timing results

    plt.figure(figsize=(10, 6))

    # Generate equally spaced y-axis ticks
    y_ticks = np.arange(len(batch_sizes))

    # Plot horizontal bars for mean timing results
    colors = plt.cm.viridis(np.linspace(0, 1, len(batch_sizes)))  # Generate different colors
    for i, (mean, size) in enumerate(zip(timing_means, batch_sizes)):
        plt.barh(y_ticks[i], mean, color=colors[i], label=f'Batch Size {size}')

    # Set y-axis ticks and labels
    plt.yticks(y_ticks, batch_sizes)

    plt.title('PyTorch Mean Execution Time vs Batch Size')
    plt.xlabel('Mean Execution Time (seconds)')
    plt.ylabel('Batch Size')
    plt.legend()
    plt.grid(axis='x', linestyle='--', alpha=0.7)  # Remove background grid lines
    plt.gca().invert_yaxis()  # Invert y-axis to have the smallest batch size at the top
    plt.tight_layout()
    plt.show()

# Usage
#execute_and_plot_tvm_timit()


## Begin TVM steps




In [112]:
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
import tvm.contrib.graph_executor as runtime
import tvm.auto_scheduler as auto_scheduler
from tvm.autotvm.tuner import XGBTuner

import shutil

## Define Network
First we need to define the network in relay frontend API.
We can load some pre-defined network from :code:`tvm.relay.testing`.
We can also load models from MXNet, ONNX and TensorFlow.



In [113]:
def get_network(name, batch_size):
    """Get the symbol definition and random weight of a network"""
    input_shape = (batch_size, 3, 224, 224)
    output_shape = (batch_size, 1000)

    if "resnet" in name:
        n_layer = int(name.split("-")[1])
        mod, params = relay.testing.resnet.get_workload(
            num_layers=n_layer, batch_size=batch_size, dtype=dtype
        )
    elif "vgg" in name:
        n_layer = int(name.split("-")[1])
        mod, params = relay.testing.vgg.get_workload(
            num_layers=n_layer, batch_size=batch_size, dtype=dtype
        )
    elif name == "mobilenet":
        mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype)
    elif name == "squeezenet_v1.1":
        mod, params = relay.testing.squeezenet.get_workload(
            batch_size=batch_size, version="1.1", dtype=dtype
        )
    elif name == "inception_v3":
        input_shape = (batch_size, 3, 299, 299)
        mod, params = relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
    elif name == "mxnet":
        # an example for mxnet model
        from mxnet.gluon.model_zoo.vision import get_model

        block = get_model("resnet18_v1", pretrained=True)
        mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype)
        net = mod["main"]
        net = relay.Function(
            net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs
        )
        mod = tvm.IRModule.from_expr(net)
    else:
        raise ValueError("Unsupported network: " + name)

    return mod, params, input_shape, output_shape

## Set Tuning Options
Before tuning, we apply some configurations.



In [146]:
#### DEVICE CONFIG ####
target = tvm.target.cuda()

#### TUNING OPTION ####
network = "resnet-18"
log_file = "%s.log" % network
dtype = "float32"

tuning_option = {
    "log_filename": log_file,
    "tuner": "ga",
    "n_trial": 2000,
    "early_stopping": 20,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(timeout=10),
        runner=autotvm.LocalRunner(number=10, repeat=10, timeout=15, min_repeat_ms=150),
    ),
}

## Begin Tuning
Now we can extract tuning tasks from the network and begin tuning.
Here, we provide a simple utility function to tune a list of tasks.
This function is just an initial implementation which tunes them in sequential order.
We will introduce a more sophisticated tuning scheduler in the future.



In [147]:
import os
import tvm
from tvm import autotvm
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner

def tune_tasks(
    tasks,
    measure_option,
    tuner="xgb_rank_itervar",
    n_trial=1000,
    early_stopping=20,
    log_filename="tuning.log",
    use_transfer_learning=False,
):
    # create tmp log file
    tmp_log_file = log_filename + ".tmp"
    if os.path.exists(tmp_log_file):
        os.remove(tmp_log_file)

    for i, tsk in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
        
        # Added print statement to indicate the start of tuning for each task
        print(f"\nStarting tuning for {prefix.strip()}")

        # create tuner
        if tuner == "xgb":
            tuner_obj = XGBTuner(tsk, loss_type="reg")
        elif tuner == "xgb_knob":
            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
        elif tuner == "xgb_itervar":
            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
        elif tuner == "xgb_curve":
            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
        elif tuner == "xgb_rank":
            tuner_obj = XGBTuner(tsk, loss_type="rank")
        elif tuner == "xgb_rank_knob":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
        elif tuner == "xgb_rank_itervar":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
        elif tuner == "xgb_rank_curve":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
        elif tuner == "xgb_rank_binary":
            tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
        elif tuner == "xgb_rank_binary_knob":
            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", feature_type="knob")
        elif tuner == "xgb_rank_binary_itervar":
            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", feature_type="itervar")
        elif tuner == "xgb_rank_binary_curve":
            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", feature_type="curve")
        elif tuner == "ga":
            tuner_obj = GATuner(tsk, pop_size=100)
        elif tuner == "random":
            tuner_obj = RandomTuner(tsk)
        elif tuner == "gridsearch":
            tuner_obj = GridSearchTuner(tsk)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

        # monitor=autotvm.callback.Monitor()
        
        def tuning_callback(_, inputs, results, loop_idx=[0]):
            loop_idx[0] += 1
            for inp, res in zip(inputs, results):
                config = inp.config
                if res.error_no == 0:
                    
                    all_cost = res.all_cost
                    timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(res.timestamp))
                    GFLOPS = tsk.flop / 1e9
                    error = None
                    
                    print(f"{prefix} Trial {loop_idx[0]}/{tsk_trial}...")
                    print("    Configuration Options:")
                    for k, v in config._entity_map.items():
                        print(f"        {k}: {v}")

                    lencost=len(res.costs)
                    print(f"       Length is:",lencost)
                    for i in range(lencost):
                        time_cost = res.costs[i]
                        print(f"    Time Cost[{i}]: {time_cost:.6f} s")
                    #print(f"    Task: {tsk.name}")
                    #print(f"    Input Shape: {tsk.args[0]}")
                    #print(f"    Filter Shape: {tsk.args[1]}")
                    #print(f"    Strides: {tsk.args[2]}")
                    #print(f"    Padding: {tsk.args[3]}")
                    #print(f"    Dilation: {tsk.args[4]}")
                    #print(f"    Data Type: {tsk.args[5]}")
                    print(f"    FLOPs: {tsk.flop}")
                    print(f"    All Cost: {all_cost:.6f} s")
                    print(f"    Timestamp: {timestamp} ")
                    print(f"    GFLOPS: {GFLOPS:.2f}")
                  
        
        # do tuning
        tsk_trial = min(n_trial, len(tsk.config_space))
        tuner_obj.tune(
            n_trial=tsk_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
                autotvm.callback.log_to_file(tmp_log_file),
                # monitor,
                tuning_callback
            ],
        )

    # pick best records to a cache file
    autotvm.record.pick_best(tmp_log_file, log_filename)
    os.remove(tmp_log_file)
    
    # print("Collected Scores (FLOPS) during tuning:")
    # print(monitor.trial_scores())
    # print("Timestamps of each trial:")
    # print(monitor.trial_timestamps())

# Uncomment and run the function as needed
# module = tune_and_evaluate(tuning_option, batch_size)


In [164]:
def tune_and_evaluate(tuning_opt, batch_size):
    # Extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, out_shape = get_network(network, batch_size)
    tasks = autotvm.task.extract_from_program(
        mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
    )

    # Print only the first task
    print("\nExtracted task:")
    task = tasks[0]
    print(f"Task 1/1")
    print(f"  Name: {task.name}")
    print(f"  Args: {task.args}")
    print(f"  Workload: {task.workload}")
    print(f"  Config Space Size: {len(task.config_space)}")
    print(f"  Target: {task.target}")
    print(f"  FLOPs: {task.flop}")
    print(f"  Function: {task.func}")
    print(f"  KWArgs: {task.kwargs}")
    
    # Print detailed configurations for the first task
    print("  Sample Configurations:")
    for j, config in enumerate(task.config_space):
        if j >= 5:
            break
        print(f"    Config {j + 1}: {config}")
    print("")
    
    config = task.config_space.get(0)

    measure_input = autotvm.measure.MeasureInput(target=target, task=task, config=config)
    builder = autotvm.measure.LocalBuilder(timeout=10, n_parallel=None)
    # builder = autotvm.measure.LocalBuilder(timeout=10, n_parallel=None, build_func="ndk")
    # builder = autotvm.measure.LocalBuilder(timeout=10, n_parallel=None, build_func="stackvm")

    tmp_b4_del = "/tmp/tmp_b4_del"
    if os.path.exists(tmp_b4_del):
        shutil.rmtree(tmp_b4_del)
    shutil.copytree(builder.tmp_dir, tmp_b4_del)
    print(f"Contents copied to {tmp_b4_del}")
    print(f"tmp_dir after init: {builder.tmp_dir}")

    if not hasattr(builder, 'build_kwargs'):
        builder.build_kwargs = {}
    
    print("\nBuilding measure input...")
    build_results = builder.build([measure_input])

    print(f"tmp_dir after build: {builder.tmp_dir}")
    
    print("\nBuild Results:")
    for result in build_results:
        print(result) 
        if hasattr(result, 'costs'):
            print(f"  Costs: {result.costs}")
        else:
            print("  Costs attribute not found.")
        if hasattr(result, 'error_no'):
            print(f"  Error No: {result.error_no}")
        else:
            print("  Error No attribute not found.")
        if hasattr(result, 'all_cost'):
            print(f"  All Cost: {result.all_cost}")
        else:
            print("  All Cost attribute not found.")
        if hasattr(result, 'timestamp'):
            print(f"  Timestamp: {result.timestamp}")
        else:
            print("  Timestamp attribute not found.")




    

    # Tuning the first task
    print("Tuning...")
    # tune_tasks([task], **tuning_opt)

    # Compile kernels with history best records
    with autotvm.apply_history_best(tuning_opt['log_filename']):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # Load parameters
        dev = tvm.device(str(target), 0)
        module = runtime.GraphModule(lib["default"](dev))
        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input("data", data_tvm)

        # Evaluate
        print("Evaluate inference time cost...")
        print(module.benchmark(dev, number=1, repeat=600))
        
    return module

# Uncomment and run the function as needed:
# module = tune_and_evaluate(tuning_option, batch_size)


In [165]:
##### # Uncomment and run the function as needed:
module = tune_and_evaluate(tuning_option, batch_size)

Extract tasks...

Extracted task:
Task 1/1
  Name: conv2d_nchw.cuda
  Args: (('TENSOR', (10, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32')
  Workload: ('conv2d_nchw.cuda', ('TENSOR', (10, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32')
  Config Space Size: 79027200
  Target: cuda -keys=cuda,gpu -arch=sm_75 -max_num_threads=1024 -model=unknown -thread_warp_size=32
  FLOPs: 2360279040.0
  Function: <tvm.autotvm.task.task.TaskTemplate object at 0x7fdca75f7ad0>
  KWArgs: {}
  Sample Configurations:
    Config 1: tile_f
    Config 2: tile_y
    Config 3: tile_x
    Config 4: tile_rc
    Config 5: tile_ry

Contents copied to /tmp/tmp_b4_del
tmp_dir after init: /tmp/tmpgavdd29_

Building measure input...
tmp_dir after build: /tmp/tmpgtmp9x8i

Build Results:
BuildResult(filename='/tmp/tmpgtmp9x8i/tmp_func_87e227011be18206.tar', arg_info=(((10, 64, 112, 112), 'float32'), ((64, 3, 

In [129]:
def tvm_relay_with_file(network, batch_size):
    tuning_logs_dir = "/home1/public/misampson/resnet-50/git/ITE-Forth-CARV/tuning-logs"
    log_dir = f"log{batch_size}"
    logfile_path = os.path.join(tuning_logs_dir, "longer-tune", log_dir, "resnet-18.log")
    
    mod, params, input_shape, out_shape = get_network(network, batch_size)
    
    with autotvm.apply_history_best(logfile_path):
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod, target=target, params=params)
    
    # Load the compiled module onto the device
    dev = tvm.device(str(target), 0)
    module = runtime.GraphModule(lib["default"](dev))
    return module

# module = tvm_relay_with_file(network, batch_size)

In [None]:
import threading
import time
import numpy as np
import matplotlib.pyplot as plt

def execute_and_plot_autotune_timit():
    global tvm_lib, tvm_inp_name
    batch_sizes = [1, 10, 100, 200, 256]
    timing_results = []

    for batch_size in batch_sizes:
        module = tvm_relay_with_file(network, batch_size)
        tvm_wo_autotune_time=timit(run_module,module)
        mean_time = tvm_wo_autotune_time["mean"]
        save_mean_time(batch_size, mean_time)

    timing_results = get_timing_results(batch_sizes)  # Retrieve timing results from saved files
    plot_timing_results(timing_results)


def save_mean_time(batch_size, mean_time):
    with open(f'autotune_timing_batch_{batch_size}.txt', 'w') as f:
        f.write(str(mean_time))

def get_timing_results(batch_sizes):
    timing_results = []
    for batch_size in batch_sizes:
        with open(f'autotune_timing_batch_{batch_size}.txt', 'r') as f:
            mean_time = float(f.read())
        timing_results.append((batch_size, mean_time))
    return timing_results

def plot_timing_results(timing_results):
    # Remove None values from timing_results
    timing_results = [result for result in timing_results if result is not None]

    if not timing_results:
        print("No timing results to plot.")
        return

    timing_results.sort(key=lambda x: x[1])  # Sort by mean time
    batch_sizes = [result[0] for result in timing_results]  # Extract batch sizes
    timing_means = [result[1] for result in timing_results]  # Extract timing results

    plt.figure(figsize=(10, 6))

    # Generate equally spaced y-axis ticks
    y_ticks = np.arange(len(batch_sizes))

    # Plot horizontal bars for mean timing results
    colors = plt.cm.viridis(np.linspace(0, 1, len(batch_sizes)))  # Generate different colors
    for i, (mean, size) in enumerate(zip(timing_means, batch_sizes)):
        plt.barh(y_ticks[i], mean, color=colors[i], label=f'Batch Size {size}')

    # Set y-axis ticks and labels
    plt.yticks(y_ticks, batch_sizes)

    plt.title('PyTorch Mean Execution Time vs Batch Size')
    plt.xlabel('Mean Execution Time (seconds)')
    plt.ylabel('Batch Size')
    plt.legend()
    plt.grid(axis='x', linestyle='--', alpha=0.7)  # Remove background grid lines
    plt.gca().invert_yaxis()  # Invert y-axis to have the smallest batch size at the top
    plt.tight_layout()
    plt.show()

# Usage
execute_and_plot_autotune_timit()


Timing Summary:
{'mean': 0.008999020233750344, 'median': 0.009150846395641565, 'std': 0.0001976378763779741}
Timing Summary:
{'mean': 0.046142653003335, 'median': 0.04624128295108676, 'std': 0.0002371395110089922}
Timing Summary:
{'mean': 0.3330501952208579, 'median': 0.33302059676498175, 'std': 0.0002002208966988406}
Timing Summary:
{'mean': 1.0582680746447295, 'median': 1.0577474259771407, 'std': 0.0022788268541794154}


In [None]:
# tvm_autotune_time=timit(run_module,module)

In [None]:
def plot_timestamps(time1, time2, time3):
    # Parse the timestamps to extract mean, median, and standard deviation values
    def parse_timestamp(timestamp):
        if timestamp is None:
            return None, None, None
        return timestamp["mean"], timestamp["median"], timestamp["std"]

    t1_mean, t1_median, t1_std = parse_timestamp(time1)
    t2_mean, t2_median, t2_std = parse_timestamp(time2)
    t3_mean, t3_median, t3_std = parse_timestamp(time3)

    # Plotting
    labels = ['Mean', 'Median', 'Std']
    t1_values = [t1_mean, t1_median, t1_std]
    t2_values = [t2_mean, t2_median, t2_std]
    t3_values = [t3_mean, t3_median, t3_std]

    x = range(len(labels))
    width = 0.2

    fig, ax = plt.subplots()
    bars = []
    
    if t1_mean is not None:
        bars.append(ax.bar(x, t1_values, width, label='Pytorch'))
    if t2_mean is not None:
        bars.append(ax.bar([i + width for i in x], t2_values, width, label='TVM without tuning'))
    if t3_mean is not None:
        bars.append(ax.bar([i + width*2 for i in x], t3_values, width, label='TVM autotune'))

    ax.set_xlabel('Metrics')
    ax.set_ylabel('Time')
    ax.set_title('Classification Timing Comparison')
    ax.set_xticks([i + width for i in x])
    ax.set_xticklabels(labels)
    
    # Add legend only if there are bars plotted
    if bars:
        ax.legend()

    plt.show()

In [None]:
# plot_timestamps(pytorch_time, tvm_wo_autotune_time, tvm_autotune_time)

In [None]:

def mean_timestamps(time1, time2, time3):
    # Parse the timestamps to extract mean values
    def parse_timestamp(timestamp):
        if timestamp is None:
            return None
        return timestamp["mean"]

    t1_mean = parse_timestamp(time1)
    t2_mean = parse_timestamp(time2)
    t3_mean = parse_timestamp(time3)

    # Plotting
    labels = ['Pytorch', 'TVM without tuning', 'TVM autotune']
    means = [t1_mean, t2_mean, t3_mean]

    x = range(len(labels))
    width = 0.5

    fig, ax = plt.subplots()
    bars = []

    if t1_mean is not None:
        bars.append(ax.bar(x[0], t1_mean, width, label='Pytorch'))
    if t2_mean is not None:
        bars.append(ax.bar(x[1], t2_mean, width, label='TVM without tuning'))
    if t3_mean is not None:
        bars.append(ax.bar(x[2], t3_mean, width, label='TVM autotune'))

    ax.set_xlabel('Frameworks')
    ax.set_ylabel('Mean Time')
    ax.set_title('Mean Classification Time Comparison')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()

    # Add the mean value on top of each bar
    for bar in bars:
        for b in bar:
            height = b.get_height()
            ax.annotate(f'{height:.4f}',
                        xy=(b.get_x() + b.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom')

    plt.show()

In [None]:
# mean_timestamps(pytorch_time, tvm_wo_autotune_time, tvm_autotune_time)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def get_timing_results(file_prefixes, batch_sizes):
    timing_results_list = []
    for file_prefix in file_prefixes:
        timing_results = []
        for batch_size in batch_sizes:
            file_path = f'{file_prefix}{batch_size}.txt'
            try:
                with open(file_path, 'r') as f:
                    mean_time = float(f.read())
                timing_results.append((batch_size, mean_time))
            except FileNotFoundError:
                pass  # Skip if file not found for the current batch size
        timing_results_list.append(timing_results)
    return timing_results_list

def plot_timing_results(timing_results_list, labels, model, dataset):
    if not timing_results_list or not labels:
        print("No timing results or labels provided.")
        return
    
    plt.figure(figsize=(12, 8))  # Adjust figsize if needed

    # Generate equally spaced x-axis ticks
    x_ticks = np.arange(len(timing_results_list[0]))
    
    # Define colors for PyTorch, TVM, and autotune
    color_map = {'PyTorch': 'orange', 'TVM': 'lightblue', 'Autotune': 'darkblue'}
    
    # Plot vertical bars for mean timing results
    for i, (timing_results, label) in enumerate(zip(timing_results_list, labels)):
        timing_means = [result[1] for result in timing_results]
        batch_sizes = [result[0] for result in timing_results]
        
        for j, (mean, size) in enumerate(zip(timing_means, batch_sizes)):
            plt.bar(x_ticks[j] + i * 0.2, mean, color=color_map.get(label, 'black'), width=0.2)
            #plt.text(x_ticks[j] + i * 0.2, mean, f'{mean:.3f}s', ha='center', va='bottom', color='black', fontsize=8)

    # Set x-axis ticks and labels
    if 256 in batch_sizes:
        batch_sizes = [size for size in batch_sizes if size != 256] + [256]  # Move batch size 256 to the end
    plt.xticks(x_ticks + 0.2 * len(timing_results_list) / 2, batch_sizes, fontsize=12)

    plt.title('Execution on Nvidia GPU 2080ti', fontsize=36)
    plt.xlabel('Batch Size', fontsize=24)
    plt.ylabel('Mean Execution Time (seconds)', fontsize=24)
    
    # Define custom legend labels and handles with corresponding colors
    custom_handles = [plt.Rectangle((0,0),1,1, color=color_map[label]) for label in labels]
    custom_labels = labels
    
    # Display legend with custom labels and handles
    plt.legend(custom_handles, custom_labels, loc='upper left', fontsize=12)
    
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    plt.tight_layout()
    plt.show()

# Example usage
file_prefixes = ['pytorch_timing_batch_', 'tvm_timing_batch_', 'autotune_timing_batch_']
labels = ['PyTorch', 'TVM', 'Autotune']
batch_sizes = [1, 10, 100, 200, 256] 
timing_results_list = get_timing_results(file_prefixes, batch_sizes)
plot_timing_results(timing_results_list, labels, model='ResNet-18', dataset='ImageNet')


In [27]:
a = ["abc", "def", "tmp"]
"".join(a)

'abcdeftmp'