# Matrix Multiplication Analysis

In [105]:
from pprint import pprint

filename = "./sim_480/stats (q5).txt"

MATRIX_SIZE = 32

with open(filename) as f:
    lines = f.readlines()

In [55]:
def find_lines(pattern: str) -> [str]:
    result = []
    for line in lines:
        if pattern in line:
            result.append(line)
    return result

def find_line(pattern: str) -> str:
    for line in lines:
        if pattern in line:
            return line

In [122]:
# A) Clock Cycles
clock_cycles = int(find_line("gpu_sim_cycle").strip().split("=")[1])

# Execution time (sim_cycles/core_frequency)
core_freq = float(find_line("gpgpu_clock_domains").split()[1].split(":")[0]) * 1e6  # clock is in MHz
execution_time = clock_cycles/core_freq


In [130]:
# B) # Instructions
time_sec = int(find_line("gpgpu_simulation_time").split("(")[1][:-1].split(" ")[0])
rate = int(find_line("gpgpu_simulation_rate").split("=")[1].strip().split(" ")[0])
instructions = rate * time_sec

In [131]:
# C) High IPC ?
IPC = float(find_line("gpu_ipc").strip().split("=")[1].strip())

In [132]:
# D) DRAM Bandwidth & L2 Cache Hit rate

# Peak Off-chip DRAM Bandwidth (bit/s) = gpgpu_n_mem * gpgpu_n_mem_per_ctrlr * gpgpu_dram_buswidth * DRAM Clock * 2
DRAM_Clock = float(find_line("gpgpu_clock_domains").split()[1].split(":")[-1]) * 1e6  # clock is in MHz
gpgpu_dram_buswidth = int(find_line("gpgpu_dram_buswidth").split()[1])  # gpgpu_dram_buswidth is in bytes
gpgpu_n_mem = int(find_line("gpgpu_n_mem").split()[1])
gpgpu_n_mem_per_ctrlr = int(find_line("gpgpu_n_mem_per_ctrlr").split()[1])

DRAM_peak_bandwidth = (gpgpu_n_mem * gpgpu_n_mem_per_ctrlr * gpgpu_dram_buswidth * DRAM_Clock * 2) / 1e9  # Bytes/s

average_utilization = [float(ut.strip().split("=")[-1]) for ut in find_lines("bw_util")]
average_utilization = sum(average_utilization) / len(average_utilization)

DRAM_bandwidth = DRAM_peak_bandwidth * average_utilization

# L2 Hit Rate

L2_miss_rate = float(find_line("L2_total_cache_miss_rate").strip().split("=")[1])
L2_hit_rate = round((1.0 - L2_miss_rate) * 100, 2)

In [133]:
# E) FLOP/s ?
FLOPs = (MATRIX_SIZE ** 2 * (2 * MATRIX_SIZE)) / execution_time

In [135]:
print(f"Cycles        : {clock_cycles:-20}")
print(f"Instructions  : {instructions:-20}")
print(f"IPC           : {IPC:-20}")
print(f"DRAM BW       : {DRAM_bandwidth:-20} (GB/s)")
print(f"DRAM BW       : {DRAM_bandwidth * 8:-20} (Gb/s)")
print(f"L2 Hit Rate   : {L2_hit_rate:-20} %")
print(f"GFLOP/s       : {FLOPs/1e9:20}")
print(f"Execution time: {execution_time*1000:20} (ms)")

Cycles        :                 5368
Instructions  :               154624
IPC           :              28.8048
DRAM BW       :           0.17526432 (GB/s)
DRAM BW       :           1.40211456 (Gb/s)
L2 Hit Rate   :                60.09 %
GFLOP/s       :    8.546050670640836
Execution time: 0.007668571428571427 (ms)
