In [1]:
import os
from pathlib import Path
import pickle
from fastfusion import Specification
from fastfusion.mapper.FFM.exploration.mapper_multi_einsum import get_sims
from fastfusion.mapper.FFM.joining.simexplore import join_sims
from fastfusion.mapper.FFM.exploration.mapping_filter_tags import get_one_split_tag, get_ffmt_tag


ARCH_DIR          = Path('architecture/')
WORKLOAD_DIR      = Path('workloads/')
MAPPINGS_SIMS_DIR = Path('results/sims/')
MAPPINGS_DATA_DIR = Path('results/data/')


def one_split_tagger(compatibility):
    return get_one_split_tag(compatibility, "MainMemory")
NAME_TO_TAGGER = {
    'one_split': one_split_tagger
}


def get_experiment_name(tagger_name, arch_name: list[str], workload_name):
    combined_arch_name = '+'.join(arch_name)
    return f'matmul8_mixed.{tagger_name}.{combined_arch_name}.{workload_name}'


def get_sims_with_cache(tagger_name=None,
                        refresh_cache=False,
                        arch: list[str]=['snowcat'],
                        workload='matmuls8_mixed'):
    data_name  = get_experiment_name(tagger_name, arch, workload)
    sims_names = [get_experiment_name(tagger_name, [a], workload) for a in arch]

    result_pickle_name = MAPPINGS_DATA_DIR / f'{data_name}.pkl'
    if result_pickle_name.is_file() and not refresh_cache:
        with open(result_pickle_name, 'rb') as f:
            mappings = pickle.load(f)
            print(f'Loaded final results from cache {result_pickle_name}')
            return mappings

    if tagger_name is None:
        tagger = None
    else:
        tagger = NAME_TO_TAGGER[tagger_name]

    all_sims = []
    for a in arch:
        sims_name = get_experiment_name(tagger_name, [a], workload)
        sims_pickle_name = MAPPINGS_SIMS_DIR / f'{sims_name}.pkl'
        if sims_pickle_name.is_file() and not refresh_cache:
            with open(sims_pickle_name, 'rb') as f:
                sims = pickle.load(f)
                print(f'Loaded SIMs from {sims_pickle_name}')
                all_sims.append(sims)
                continue
        spec = Specification.from_yaml(ARCH_DIR / f'{a}.arch.yaml', WORKLOAD_DIR / f'{workload}.workload.yaml')
        spec.estimate_energy_area()
        workload = spec.workload
        renames = spec.renames
        flattened_architecture = spec.get_flattened_architecture()
        sims, decompress_data = get_sims(spec, flattened_architecture, tagger=tagger)

        with open(sims_pickle_name, 'wb') as f:
            pickle.dump(sims, f)

        all_sims.append(sims)

    combined_sims = {}
    for sims in all_sims:
        for einsum, sims_for_einsum in sims.items():
            if einsum not in combined_sims:
                combined_sims[einsum] = []
            combined_sims[einsum].extend(sims_for_einsum)

    mappings = join_sims(combined_sims, spec, flattened_architecture, drop_valid_reservations=False)
    # decompress_sims(mappings, decompress_data, workload.einsum_names)

    with open(result_pickle_name, 'wb') as f:
        pickle.dump(mappings, f)
        print(f'Saved results to cache {result_pickle_name}')

    return mappings

mappings_one_split = get_sims_with_cache('one_split')
mappings = get_sims_with_cache()
mappings_tileflow = get_sims_with_cache(tagger_name='one_split', arch=['snowcat_even'])
mappings_ffmt = get_sims_with_cache(arch=['snowcat_weight_tiled', 'snowcat_weight_untiled'])

# pr.disable()
# s = io.StringIO()
# ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
# ps.print_stats(30)  # Print top 30 time-consuming functions
# print(s.getvalue())

# TODO: Check for ranks not in the mapping and put them at the bottom
# TODO: What if there are no loops? 
# TODO: Set _must_exist for all backing storage nodes
# TODO: Constraint attacher
# TODO: Can't have tile size constraints on backing memory
# TODO: Einsum orders
# TODO: Copy Einsums
# TODO: Test dataflow constraints and order of storage nodes
# I'm doing the tile shape exploration now and I'm trying to understand this note. I think I understand what you're saying.
# Can I ask one thing from the constraint code? If the constraint is an equality, then just set the tile_shape attribute of the node (or factor or whatever is needed) to the value.
# The tile shape exploration assumes a particular mapspace (in most cases, tile shapes are factors of the full rank shape), so an equality may never be satisfied. E.g., if the constraint sets the tile shape equal to a non-factor value because you want a particular imperfect factorization, but that's never in the mapspace, then you'll get nothing.
# It's also a bit more efficient to just set the value and the explorer doesn't have to figure out the equality by trial-and-error. For other more complicated constraints, trial-and-error is better.

INFO        Loading yaml file architecture/snowcat.arch.yaml
INFO        Found top key variables in architecture/snowcat.arch.yaml
INFO        Found top key architecture in architecture/snowcat.arch.yaml
INFO        Loading yaml file workloads/matmuls8_mixed.workload.yaml
INFO        Found top key workload in workloads/matmuls8_mixed.workload.yaml
INFO        Calculated "0.5" = 0.5.
Generating storage and loop choices for Einsum Matmul1: 20it [00:00, 331.56it/s]
Generating storage and loop choices for Einsum Matmul2: 36it [00:00, 431.85it/s]
Generating storage and loop choices for Einsum Matmul3: 36it [00:00, 439.86it/s]
Generating storage and loop choices for Einsum Matmul4: 36it [00:00, 467.14it/s]
Generating storage and loop choices for Einsum Matmul5: 36it [00:00, 440.15it/s]
Generating storage and loop choices for Einsum Matmul6: 36it [00:00, 423.01it/s]
Generating storage and loop choices for Einsum Matmul7: 36it [00:00, 678.75it/s]
Generating storage and loop choices for Einsum 

SIM Matmul1 tensors: {'T1'}
SIM Matmul2 tensors: {'T2', 'T1'}
SIM Matmul3 tensors: {'T3', 'T2'}
SIM Matmul4 tensors: {'T3', 'T4'}
SIM Matmul5 tensors: {'T4', 'T5'}
SIM Matmul6 tensors: {'T6', 'T5'}
SIM Matmul7 tensors: {'T7', 'T6'}
SIM Matmul8 tensors: {'T7'}


Inital consolidate Matmul1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 790/790 [00:01<00:00, 636.50it/s]
Inital consolidate Matmul2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1036/1036 [00:00<00:00, 1349.99it/s]
Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 220/220 [00:00<00:00, 510.07it/s]
Inital consolidate Matmul3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1100/1100 [00:00<00:00, 1166.31it/s]
Grouping Partial Mappings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Initial consolidate and group: 15.80 seconds

Einsum Matmul2 (2/8)
Consolidating: 0.00 seconds


Grouping Partial Mappings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 470.96it/s]


Combining: 0.34 seconds
Grouping: 0.00 seconds
Bucket merging: 0.01 seconds
Removed 0/220 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul1 <--> Matmul2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 220/220 [00:00<00:00, 1006.71it/s]


Mapping merging: 0.53 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 181(99) x 391(211) -> 220
	Number of buckets for Einsum Matmul2: 220
	Number of mappings for Einsum Matmul2: 2696
	Mappings per group for Einsum Matmul2: 12.254545454545454

Einsum Matmul3 (3/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 13148.29it/s]


Combining: 0.07 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds
Removed 0/351 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul2 <--> Matmul3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 351/351 [00:00<00:00, 955.84it/s]


Mapping merging: 0.81 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 210(114) x 420(226) -> 351
	Number of buckets for Einsum Matmul3: 351
	Number of mappings for Einsum Matmul3: 6214
	Mappings per group for Einsum Matmul3: 17.703703703703702

Einsum Matmul4 (4/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 799.15it/s]


Combining: 0.25 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds
Removed 0/321 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul3 <--> Matmul4: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 321/321 [00:00<00:00, 628.97it/s]


Mapping merging: 0.76 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 210(114) x 391(211) -> 321
	Number of buckets for Einsum Matmul4: 321
	Number of mappings for Einsum Matmul4: 6940
	Mappings per group for Einsum Matmul4: 21.61993769470405

Einsum Matmul5 (5/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 101/101 [00:00<00:00, 176.55it/s]


Combining: 0.66 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds
Removed 0/306 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul4 <--> Matmul5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [00:00<00:00, 520.06it/s]


Mapping merging: 0.83 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 181(99) x 362(196) -> 306
	Number of buckets for Einsum Matmul5: 306
	Number of mappings for Einsum Matmul5: 8189
	Mappings per group for Einsum Matmul5: 26.76143790849673

Einsum Matmul6 (6/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 101/101 [00:00<00:00, 655.37it/s]


Combining: 0.25 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds
Removed 0/336 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul5 <--> Matmul6: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 336/336 [00:00<00:00, 578.74it/s]


Mapping merging: 0.98 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 181(99) x 391(211) -> 336
	Number of buckets for Einsum Matmul6: 336
	Number of mappings for Einsum Matmul6: 10406
	Mappings per group for Einsum Matmul6: 30.970238095238095

Einsum Matmul7 (7/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 671.56it/s]


Combining: 0.31 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds
Removed 0/351 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul6 <--> Matmul7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 351/351 [00:00<00:00, 568.45it/s]


Mapping merging: 1.02 seconds
Scaled runtime by 1.0. Runtime: 1.15
	Combining 210(114) x 420(226) -> 351
	Number of buckets for Einsum Matmul7: 351
	Number of mappings for Einsum Matmul7: 10896
	Mappings per group for Einsum Matmul7: 31.042735042735043

Einsum Matmul8 (8/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 736.80it/s]


Combining: 0.31 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds


Merging mappings Matmul7 <--> Matmul8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 117/117 [00:00<00:00, 556.24it/s]


Mapping merging: 0.43 seconds
Scaled runtime by 1.0. Runtime: 1.91
	Combining 210(114) x 210(114) -> 117
	Number of buckets for Einsum Matmul8: 117
	Number of mappings for Einsum Matmul8: 2368
	Mappings per group for Einsum Matmul8: 20.23931623931624


Final consolidate: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 117/117 [00:00<00:00, 1398.07it/s]
Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 949.37it/s]
INFO        Loading yaml file architecture/snowcat.arch.yaml
INFO        Found top key variables in architecture/snowcat.arch.yaml
INFO        Found top key architecture in architecture/snowcat.arch.yaml
INFO        Loading yaml file workloads/matmuls8_mixed.workload.yaml
INFO        Found top key workload in workloads/matmuls8_mixed.workload.yaml



Initial consolidate and group: 15.80 seconds
Consolidating: 0.05 seconds
Combining: 2.18 seconds
Grouping: 0.01 seconds
Bucket merging: 0.14 seconds
Removing mappings that can't be combined later: 0.03 seconds
Mapping merging: 5.38 seconds

Total: 23.58 seconds

Saved results to cache results/data/matmul8_mixed.one_split.snowcat.matmuls8_mixed.pkl


Generating storage and loop choices for Einsum Matmul1: 20it [00:00, 557.45it/s]
Generating storage and loop choices for Einsum Matmul2: 36it [00:00, 926.92it/s]
Generating storage and loop choices for Einsum Matmul3: 36it [00:00, 973.43it/s]
Generating storage and loop choices for Einsum Matmul4: 36it [00:00, 894.65it/s]
Generating storage and loop choices for Einsum Matmul5: 36it [00:00, 831.62it/s]
Generating storage and loop choices for Einsum Matmul6: 36it [00:00, 813.59it/s]
Generating storage and loop choices for Einsum Matmul7: 36it [00:00, 753.84it/s]
Generating storage and loop choices for Einsum Matmul8: 18it [00:00, 757.51it/s]
Generating Partial Mappings: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 254/254 [00:08<00:00, 31.13it/s]
Grouping Partial Mappings for Matmul1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████

SIM Matmul1 tensors: {'T1'}
SIM Matmul2 tensors: {'T2', 'T1'}
SIM Matmul3 tensors: {'T3', 'T2'}
SIM Matmul4 tensors: {'T3', 'T4'}
SIM Matmul5 tensors: {'T4', 'T5'}
SIM Matmul6 tensors: {'T5', 'T6'}
SIM Matmul7 tensors: {'T6', 'T7'}
SIM Matmul8 tensors: {'T7'}


Inital consolidate Matmul1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 790/790 [00:00<00:00, 2236.11it/s]
Inital consolidate Matmul2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1924/1924 [00:00<00:00, 3176.25it/s]
Grouping Partial Mappings: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 429/429 [00:00<00:00, 1013.51it/s]
Inital consolidate Matmul3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2052/2052 [00:00<00:00, 2851.26it/s]
Grouping Partial Mappings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Initial consolidate and group: 17.18 seconds

Einsum Matmul2 (2/8)
Consolidating: 0.00 seconds


Grouping Partial Mappings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 489.24it/s]


Combining: 0.36 seconds
Grouping: 0.00 seconds
Bucket merging: 0.03 seconds
Removed 0/520 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul1 <--> Matmul2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 520/520 [00:00<00:00, 733.41it/s]


Mapping merging: 1.04 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 181(99) x 699(372) -> 520
	Number of buckets for Einsum Matmul2: 520
	Number of mappings for Einsum Matmul2: 3730
	Mappings per group for Einsum Matmul2: 7.173076923076923

Einsum Matmul3 (3/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 826.86it/s]


Combining: 0.28 seconds
Grouping: 0.00 seconds
Bucket merging: 0.03 seconds
Removed 0/557 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul2 <--> Matmul3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 557/557 [00:01<00:00, 528.06it/s]


Mapping merging: 1.27 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 210(114) x 728(387) -> 557
	Number of buckets for Einsum Matmul3: 557
	Number of mappings for Einsum Matmul3: 9052
	Mappings per group for Einsum Matmul3: 16.251346499102333

Einsum Matmul4 (4/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 794.73it/s]


Combining: 0.29 seconds
Grouping: 0.00 seconds
Bucket merging: 0.03 seconds
Removed 0/520 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul3 <--> Matmul4: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 520/520 [00:00<00:00, 560.20it/s]


Mapping merging: 1.24 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 210(114) x 656(350) -> 520
	Number of buckets for Einsum Matmul4: 520
	Number of mappings for Einsum Matmul4: 12135
	Mappings per group for Einsum Matmul4: 23.33653846153846

Einsum Matmul5 (5/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 501.08it/s]


Combining: 0.32 seconds
Grouping: 0.00 seconds
Bucket merging: 0.03 seconds
Removed 0/483 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul4 <--> Matmul5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 483/483 [00:00<00:00, 547.23it/s]


Mapping merging: 1.14 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 181(99) x 627(335) -> 483
	Number of buckets for Einsum Matmul5: 483
	Number of mappings for Einsum Matmul5: 16567
	Mappings per group for Einsum Matmul5: 34.30020703933747

Einsum Matmul6 (6/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 659.59it/s]


Combining: 0.31 seconds
Grouping: 0.00 seconds
Bucket merging: 0.03 seconds
Removed 0/520 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul5 <--> Matmul6: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 520/520 [00:01<00:00, 439.76it/s]


Mapping merging: 1.61 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 181(99) x 699(372) -> 520
	Number of buckets for Einsum Matmul6: 520
	Number of mappings for Einsum Matmul6: 19084
	Mappings per group for Einsum Matmul6: 36.7

Einsum Matmul7 (7/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 575.24it/s]


Combining: 0.41 seconds
Grouping: 0.00 seconds
Bucket merging: 0.04 seconds
Removed 0/557 (100.00% remaining)
Removing mappings that can't be combined later: 0.01 seconds


Merging mappings Matmul6 <--> Matmul7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 557/557 [00:01<00:00, 364.60it/s]


Mapping merging: 1.96 seconds
Scaled runtime by 1.0. Runtime: 1.11
	Combining 210(114) x 728(387) -> 557
	Number of buckets for Einsum Matmul7: 557
	Number of mappings for Einsum Matmul7: 19930
	Mappings per group for Einsum Matmul7: 35.78096947935368

Einsum Matmul8 (8/8)
Consolidating: 0.01 seconds


Grouping Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 533.30it/s]


Combining: 0.46 seconds
Grouping: 0.00 seconds
Bucket merging: 0.02 seconds


Merging mappings Matmul7 <--> Matmul8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 515.36it/s]


Mapping merging: 1.10 seconds
Scaled runtime by 1.0. Runtime: 2.68
	Combining 210(114) x 210(114) -> 114
	Number of buckets for Einsum Matmul8: 114
	Number of mappings for Einsum Matmul8: 2445
	Mappings per group for Einsum Matmul8: 21.44736842105263


Final consolidate: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114/114 [00:00<00:00, 1376.36it/s]
Grouping Partial Mappings: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1027.51it/s]
INFO        Loading yaml file architecture/snowcat_even.arch.yaml
INFO        Found top key variables in architecture/snowcat_even.arch.yaml
INFO        Found top key architecture in architecture/snowcat_even.arch.yaml
INFO        Loading yaml file workloads/matmuls8_mixed.workload.yaml
INFO        Found top key workload in workloads/matmuls8_mixed.workload.yaml



Initial consolidate and group: 17.18 seconds
Consolidating: 0.06 seconds
Combining: 2.43 seconds
Grouping: 0.01 seconds
Bucket merging: 0.20 seconds
Removing mappings that can't be combined later: 0.04 seconds
Mapping merging: 9.35 seconds

Total: 29.27 seconds

Saved results to cache results/data/matmul8_mixed.None.snowcat.matmuls8_mixed.pkl


Generating storage and loop choices for Einsum Matmul1: 2it [00:00, 348.99it/s]
Generating storage and loop choices for Einsum Matmul2: 1it [00:00, 101.68it/s]
Generating storage and loop choices for Einsum Matmul3: 1it [00:00, 157.54it/s]
Generating storage and loop choices for Einsum Matmul4: 1it [00:00, 198.27it/s]
Generating storage and loop choices for Einsum Matmul5: 1it [00:00, 192.50it/s]
Generating storage and loop choices for Einsum Matmul6: 1it [00:00, 141.59it/s]
Generating storage and loop choices for Einsum Matmul7: 1it [00:00, 204.88it/s]
Generating storage and loop choices for Einsum Matmul8: 1it [00:00, 154.17it/s]
Generating Partial Mappings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 15264.35it/s]


SIM Matmul1 tensors: {'T1'}
SIM Matmul2 tensors: {'T2', 'T1'}
SIM Matmul3 tensors: {'T3', 'T2'}
SIM Matmul4 tensors: {'T3', 'T4'}
SIM Matmul5 tensors: {'T4', 'T5'}
SIM Matmul6 tensors: {'T6', 'T5'}
SIM Matmul7 tensors: {'T7', 'T6'}
SIM Matmul8 tensors: {'T7'}


Inital consolidate Matmul1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 4471.71it/s]
Inital consolidate Matmul2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 3319.92it/s]
Inital consolidate Matmul3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 5224.11it/s]
Inital consolidate Matmul4: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 10555.03it/s]
Inital consolidate Matmul5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Initial consolidate and group: 0.34 seconds

Einsum Matmul2 (2/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul1 <--> Matmul2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 9393.74it/s]


Mapping merging: 0.03 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 97(57) x 7(8) -> 8
	Number of buckets for Einsum Matmul2: 8
	Number of mappings for Einsum Matmul2: 8
	Mappings per group for Einsum Matmul2: 1.0

Einsum Matmul3 (3/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul2 <--> Matmul3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 6807.55it/s]


Mapping merging: 0.04 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 7(8) x 7(8) -> 8
	Number of buckets for Einsum Matmul3: 8
	Number of mappings for Einsum Matmul3: 8
	Mappings per group for Einsum Matmul3: 1.0

Einsum Matmul4 (4/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul3 <--> Matmul4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 8774.69it/s]


Mapping merging: 0.04 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 7(8) x 7(8) -> 8
	Number of buckets for Einsum Matmul4: 8
	Number of mappings for Einsum Matmul4: 8
	Mappings per group for Einsum Matmul4: 1.0

Einsum Matmul5 (5/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul4 <--> Matmul5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 6129.78it/s]


Mapping merging: 0.03 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 7(8) x 7(8) -> 8
	Number of buckets for Einsum Matmul5: 8
	Number of mappings for Einsum Matmul5: 8
	Mappings per group for Einsum Matmul5: 1.0

Einsum Matmul6 (6/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul5 <--> Matmul6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 6905.63it/s]


Mapping merging: 0.03 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 7(8) x 7(8) -> 8
	Number of buckets for Einsum Matmul6: 8
	Number of mappings for Einsum Matmul6: 8
	Mappings per group for Einsum Matmul6: 1.0

Einsum Matmul7 (7/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds
Removed 0/8 (100.00% remaining)
Removing mappings that can't be combined later: 0.00 seconds


Merging mappings Matmul6 <--> Matmul7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 11586.48it/s]


Mapping merging: 0.02 seconds
Scaled runtime by 1.0. Runtime: 0.09
	Combining 7(8) x 7(8) -> 8
	Number of buckets for Einsum Matmul7: 8
	Number of mappings for Einsum Matmul7: 8
	Mappings per group for Einsum Matmul7: 1.0

Einsum Matmul8 (8/8)
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.00 seconds


Merging mappings Matmul7 <--> Matmul8: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 6645.76it/s]


Mapping merging: 0.03 seconds
Scaled runtime by 1.0. Runtime: 0.11
	Combining 7(8) x 112(64) -> 8
	Number of buckets for Einsum Matmul8: 8
	Number of mappings for Einsum Matmul8: 8
	Mappings per group for Einsum Matmul8: 1.0


Final consolidate: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 10994.24it/s]
Grouping Partial Mappings: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1032.57it/s]



Initial consolidate and group: 0.34 seconds
Consolidating: 0.00 seconds
Combining: 0.00 seconds
Grouping: 0.00 seconds
Bucket merging: 0.01 seconds
Removing mappings that can't be combined later: 0.00 seconds
Mapping merging: 0.21 seconds

Total: 0.56 seconds

Saved results to cache results/data/matmul8_mixed.one_split.snowcat_even.matmuls8_mixed.pkl


INFO        Loading yaml file architecture/snowcat_weight_tiled.arch.yaml
INFO        Found top key variables in architecture/snowcat_weight_tiled.arch.yaml
INFO        Found top key architecture in architecture/snowcat_weight_tiled.arch.yaml
INFO        Loading yaml file workloads/matmuls8_mixed.workload.yaml
INFO        Found top key workload in workloads/matmuls8_mixed.workload.yaml
Generating storage and loop choices for Einsum Matmul1: 0it [00:00, ?it/s]


ParseError: ParseError in : name 'weight' is not defined. Set expression: "~weight". Symbol table:
	Nothing: InvertibleSet(frozenset())
	All: InvertibleSet(frozenset({'T0', 'W0', 'T1'}))
	Inputs: InvertibleSet(frozenset({'T0', 'W0'}))
	Outputs: InvertibleSet(frozenset({'T1'}))
	Intermediates: InvertibleSet(frozenset({'T1'}))
	Shared: InvertibleSet(frozenset({'T1'}))
	T0: InvertibleSet(frozenset({'T0'}))
	W0: InvertibleSet(frozenset({'W0'}))
	T1: InvertibleSet(frozenset({'T1'}))
	m: InvertibleSet(frozenset({'m'}))
	n0: InvertibleSet(frozenset({'n0'}))
	n1: InvertibleSet(frozenset({'n1'}))
	MainMemory: InvertibleSet(frozenset({'T0', 'W0'}))
	GlobalBuffer: InvertibleSet(frozenset({'T0', 'W0', 'T1'}))

In [12]:
import copy
import re
from fastfusion.frontend import Workload
from fastfusion.frontend.mapping import Iteration, Mapping, Nested, Split, Storage
from fastfusion.visualization.interactive import plotly_show
from fastfusion.mapper.FFM.visualization import make_mapping

einsum_names = Workload.from_yaml(WORKLOAD_DIR / 'matmuls8_mixed.workload.yaml').einsum_names

plotly_show(mappings_tileflow.data, "RESOURCE_GlobalBuffer_LEVEL_0", "metric_Energy", logscales=True, einsum_names=einsum_names)

INFO        Loading yaml file workloads/matmuls8_mixed.workload.yaml
INFO        Found top key workload in workloads/matmuls8_mixed.workload.yaml


VBox(children=(FigureWidget({
    'data': [{'line': {'shape': 'hv'},
              'marker': {'symbol': 'circl…

In [3]:
for i in range(1,5):
    print(i)
    for node in mappings_one_split.data.iloc[-i]['Matmul1___MAPPING'].nodes:
        print(node)

1
W0 in MainMemory
type=None tensor='W0' memory='MainMemory'
type=None tensor='W0' memory='MainMemory'
T0 in MainMemory
type=None tensor='T0' memory='MainMemory'
type=None tensor='T0' memory='MainMemory'
for n1 shape tileshape0
W0 in GlobalBuffer
type=None tensor='W0' memory='GlobalBuffer'
type=None tensor='W0' memory='GlobalBuffer'
for m shape tileshape1
T1 in GlobalBuffer
type=None tensor='T1' memory='GlobalBuffer'
type=None tensor='T1' memory='GlobalBuffer'
for m shape 1
for n0 shape 1
T0 in GlobalBuffer
type=None tensor='T0' memory='GlobalBuffer'
type=None tensor='T0' memory='GlobalBuffer'
for n1 shape 1
Einsum Matmul1
2
W0 in MainMemory
type=None tensor='W0' memory='MainMemory'
type=None tensor='W0' memory='MainMemory'
T0 in MainMemory
type=None tensor='T0' memory='MainMemory'
type=None tensor='T0' memory='MainMemory'
for n1 shape tileshape0
W0 in GlobalBuffer
type=None tensor='W0' memory='GlobalBuffer'
type=None tensor='W0' memory='GlobalBuffer'
for m shape tileshape1
T1 in Globa

In [5]:
plotly_show(mappings_one_split.data, "RESOURCE_GlobalBuffer_LEVEL_0", "metric_Energy", logscales=True, einsum_names=spec.workload.einsum_names)

VBox(children=(FigureWidget({
    'data': [{'line': {'shape': 'hv'},
              'marker': {'symbol': 'circl…