In [None]:
%load_ext autoreload
%autoreload 2
from notebook import *
# if get something about NUMEXPR_MAX_THREADS being set incorrectly, don't worry.  It's not a problem.

#KEY include Namebox.ipynb

<div style=" font-size: 300% !important;
    margin-top: 1.5em;
    margin-bottom: 1.5em;
    font-weight: bold;
    line-height: 1.0;
    text-align:center;">Lab 4: The Memory Hierarchy (Part II) -- Demos</div>


# Temporal Locality

* How much spatial locality is there in this code?
* How much temporal locality is there? 
    * How big is the working set?
    * How much reuse is there?

In [None]:
name="spatial1"
t = fiddle(f"{name}.cpp", function="stride", name=name,  run=["moneta"],opt="-O1",
code=r"""
#include"pin_tags.h"
#include"CNN/tensor_t.hpp"
#include"function_map.hpp"
#include<cstdint>

extern "C"
uint64_t* stride(uint64_t * data, uint64_t size, uint64_t arg1) {
    tensor_t<uint32_t> t(1024, 1,1,1, (uint32_t *)data);
    TAG_START("init", t.data, &t.as_vector(t.element_count()), true);

    for(uint i = 0; i < 1024; i++) {
        for(uint x = 0; x < 1024; x++) {
            t.get(x,0,0,0) = x;
        }
    }
    
    TAG_STOP("init");
    return data;
}

FUNCTION(one_array_1arg, stride);
""")

In [None]:
show_trace(f"./{name}_0", show_tag=["init"])

* How much spatial locality is there in this code?
* How much temporal locality is there? 
    * How big is the working set?
    * How much reuse is there?

In [None]:
name="spatial2"
t = fiddle(f"{name}.cpp", function="stride", name=name,  run=["moneta"],opt="-O1",
code=r"""
#include"pin_tags.h"
#include"CNN/tensor_t.hpp"
#include"function_map.hpp"
#include<cstdint>

extern "C"
uint64_t* stride(uint64_t * data, uint64_t size, uint64_t arg1) {
    tensor_t<uint32_t> t(1024, 1,1,1, (uint32_t *)data);
    TAG_START("init", t.data, &t.as_vector(t.element_count()), true);

    for(uint x = 0; x < 1024; x++) {
        t.get(x,0,0,0) = x;
    }
    
    TAG_STOP("init");
    return data;
}

FUNCTION(one_array_1arg, stride);
""")

In [None]:
show_trace(f"./{name}_0", show_tag=["init"])

* How much spatial locality is there in this code?
* How much temporal locality is there? 
    * How big is the working set?
    * How much reuse is there?

In [None]:
name="spatial3"
t = fiddle(f"{name}.cpp", function="stride", name=name,  run=["moneta"],opt="-O1",
code=r"""
#include"pin_tags.h"
#include"CNN/tensor_t.hpp"
#include"function_map.hpp"
#include<cstdint>

extern "C"
uint64_t* stride(uint64_t * data, uint64_t size, uint64_t arg1) {
    tensor_t<uint32_t> t(1024, 1,1,1, (uint32_t *)data);
    TAG_START("init", t.data, &t.as_vector(t.element_count()), true);

    for(uint x = 0; x < 1024; x+=8) {
        t.get(x,0,0,0) = x;
    }
    
    TAG_STOP("init");
    return data;
}

FUNCTION(one_array_1arg, stride);
""")

In [None]:
show_trace(f"./{name}_0", show_tag=["init"])

* How much spatial locality is there in this code?
* How much temporal locality is there? 
    * How big is the working set?
    * How much reuse is there?

# Miss Types, Locality, and the Data Structure Zoo

## Set

In [None]:
name="spatial4"
t = fiddle(f"{name}.cpp", function="working", analyze=False, run=["moneta"], name=name,opt="-O1",
code=r"""
#include"pin_tags.h"
#include"function_map.hpp"
#include"archlab.hpp"
#include<set>
#include<cstdint>

extern "C"
uint64_t* working(uint64_t * data, uint64_t size, uint64_t arg1) {
    auto s = new std::set<uint64_t>();
    uint64_t seed = 1;

    TAG_START("build", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        auto t = fast_rand(&seed);
        s->insert(t);
        auto a = s->find(t);
        TAG_GROW("build",  &(*a), &(*a)+ 1);
    }
    TAG_STOP("build");
    
    seed = 1;
    
    TAG_START("search", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        auto a = s->find(fast_rand(&seed));
        TAG_GROW("search", &(*a), &(*a)+ 1);
    }
    TAG_STOP("search");

    TAG_START_ALL("delete", false);
    delete s;
    TAG_STOP("delete");
    return data;
}

FUNCTION(one_array_1arg, working);
""",
           cmdline=f"--size {4* 1024}   --iters 1")

In [None]:
show_trace(f"./{name}_0.hdf5", show_tag=['build','search'], layer_preset=["misses-compulsory-all", "misses-all", "hits-all"])

* How much spatial locality is there in this code?
* How much temporal locality is there? 
    * How big is the working set?
    * How much reuse is there?
   

## Unordered Set

In [None]:
name="spatial5"
t = fiddle(f"{name}.cpp", function="working", analyze=False, run=["moneta"], name=name,opt="-O1",
code=r"""
#include"pin_tags.h"
#include"function_map.hpp"
#include"archlab.hpp"
#include<unordered_set>
#include<cstdint>

extern "C"
uint64_t* working(uint64_t * data, uint64_t size, uint64_t arg1) {
    auto s = new std::unordered_set<uint64_t>();
    uint64_t seed = 1;

    TAG_START("build", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        auto t = fast_rand(&seed);
        s->insert(t);
        auto a = s->find(t);
        TAG_GROW("build",  &(*a), &(*a)+ 1);
    }
    TAG_STOP("build");
    
    seed = 1;
    
    TAG_START("search", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        auto a = s->find(fast_rand(&seed));
        TAG_GROW("search", &(*a), &(*a)+ 1);
    }
    TAG_STOP("search");

    TAG_START_ALL("delete", false);
    delete s;
    TAG_STOP("delete");
    return data;
}

FUNCTION(one_array_1arg, working);
""",
           cmdline=f"--size {4* 1024}   --iters 1")

In [None]:
show_trace(f"./{name}_0.hdf5", show_tag=['build','search'], layer_preset=["misses-compulsory-all", "misses-all", "hits-all"])

## List

In [None]:
name="spatial6"
t = fiddle(f"{name}.cpp", function="working", analyze=False, run=["moneta"], name=name,opt="-O1",
code=r"""
#include"pin_tags.h"
#include"function_map.hpp"
#include"archlab.hpp"
#include<list>
#include<cstdint>

extern "C"
uint64_t* working(uint64_t * data, uint64_t size, uint64_t arg1) {
    auto s = new std::list<uint64_t>();
    uint64_t seed = 1;

    TAG_START("build", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        s->push_back(fast_rand(&seed));
        auto a = &s->back();
        TAG_GROW("build",  &(*a), &(*a)+ 1);
    }
        
    uint64_t sum = 0;
    std::cerr << s->size() << "\n";
    for(int i = 0; i < 10; i++){
        for(auto &a: *s) sum += a;
    }
    TAG_STOP("build");

    TAG_START_ALL("delete", false);
    delete s;
    TAG_STOP("delete");
    data[0] = sum;
    return data;
}

FUNCTION(one_array_1arg, working);
""",
           cmdline=f"--size {4* 1024}   --iters 1")

In [None]:
show_trace(f"./{name}_0.hdf5", show_tag=['build'], layer_preset=["misses-compulsory-all", "misses-all", "hits-all"])

## Vector

In [None]:
name="spatial7"
t = fiddle(f"{name}.cpp", function="working", analyze=False, run=["moneta"], name=name,opt="-O1",
code=r"""
#include"pin_tags.h"
#include"function_map.hpp"
#include"archlab.hpp"
#include<vector>
#include<cstdint>

extern "C"
uint64_t* working(uint64_t * data, uint64_t size, uint64_t arg1) {
    auto s = new std::vector<uint64_t>();
    uint64_t seed = 1;

    TAG_START("build", (void*)-1, 0, true);
    for(uint x = 0; x < size; x++) {
        s->push_back(fast_rand(&seed));
        auto a = &s->back();
        TAG_GROW("build",  &(*a), &(*a)+ 1);
    }
        
    uint64_t sum = 0;
    std::cerr << s->size() << "\n";
    for(int i = 0; i < 10; i++){
        for(auto &a: *s) sum += a;
    }
    TAG_STOP("build");

    TAG_START_ALL("delete", false);
    delete s;
    TAG_STOP("delete");
    data[0] = sum;
    return data;
}

FUNCTION(one_array_1arg, working);
""",
           cmdline=f"--size {4* 1024}   --iters 1")

In [None]:
show_trace(f"./{name}_0.hdf5", show_tag=['build'], layer_preset=["misses-compulsory-all", "misses-all", "hits-all"])

# Image Stabilization