In [None]:
# Prequel

import os
import sys
import copy

WORKING_DIRECTORY = os.getcwd()
sys.path.insert(0, os.path.join(os.path.abspath(WORKING_DIRECTORY), "src"))

import pandas as pd
import plots
from operations import Operation, OperationList
from timeloop import generate_timeloop_results_for_ops
from mappings import PotentialMultiLayerMapping, combine_solutions, combine_solutions_access_cache
from tensors import Tensor, Rank
from operations import Operation, OperationList

from util import *
import paretos as paretos

# Check to see where we are running timeloop-mapper from
import shutil
shutil.which("timeloop-mapper")

In [None]:
# Reproduce Fig 18 of the Orojenesis paper

# Make an intermediate = Tensor('A2'), then use
# this for both gemm1/gemm2 (enforce shared tensor and rank names)
int_A2 = Tensor("A2", [Rank("M", 32 * 1024), Rank("N", 16 * 1024)])
gemm1 = Operation(
    "32k_4k_16k",
    input_tensors=[
        Tensor("A1", [Rank("M", 32 * 1024), Rank("K", 4 * 1024)]),
        Tensor("B1", [Rank("N", 16 * 1024), Rank("K", 4 * 1024)]),
    ],
    output_tensors=[int_A2],
)
gemm2 = Operation(
    "32k_16k_4k",
    input_tensors=[int_A2, Tensor("B2", [Rank("N", 4 * 1024), Rank("K", 16 * 1024)])],
    output_tensors=[Tensor("D2", [Rank("M", 32 * 1024), Rank("N", 4 * 1024)])],
)

operations = OperationList([gemm1, gemm2])

# Paper assumes fp16 everywhere
[t.set_precision(16) for t in operations.tensors]
[t.set_accum_precision(16) for t in operations.tensors]

# Detect shared ranks
[op.auto_link_shared_ranks() for op in operations]
# Label A2 as fusable
int_A2.is_fusable = True
operations.to_pydot().create_svg().decode()

# ====================================================================================================
# FUSED VERSUS UNFUSED
# ====================================================================================================

# Disable flipped tc for now
solutions = generate_timeloop_results_for_ops(operations, flipped_tc=[False])

# Generate all-operation solutions WITHOUT fusion
initial_solutions = [
    PotentialMultiLayerMapping(**{op.name: s for op, s in zip(operations, solutions)})
]

_, solutions, baseline_util, baseline_accesses = combine_solutions(
    initial_solutions, operations, enable_fusion=False
)
solutions_unfused = PotentialMultiLayerMapping.get_final_fused_solutions(
    solutions, op_names=[], pareto_only=True
)

_, solutions, baseline_util, baseline_accesses = combine_solutions(
    initial_solutions, operations, enable_fusion=True
)
solutions_fused = PotentialMultiLayerMapping.get_final_fused_solutions(
    solutions, op_names=[], pareto_only=True
)

In [None]:
import oavesplots
import importlib
import plotly.express as px
importlib.reload(oavesplots)
df = pd.concat([solutions_fused[0].df.assign(Method='Fused'),solutions_unfused[0].df.assign(Method='Unfused')], axis=0)
# Replicate the reporting from the paper
df['Util (B)'] = df['Total Utilization']/8
df['Access (2B)'] = df['Total Accesses']/16
#mapping_col = [x for x in df.columns if 'Mapping' in x]
# Force the ordering
mapping_col = [f"{x.name} Mapping" for x in operations]
fig = px.scatter(df, x='Util (B)', y='Access (2B)', log_x=True, log_y=True, color='Method', hover_data=mapping_col)

oavesplots.interactive_plot(fig)