# Locality Comparison

Define a matrix of size n x n and perform a simple scaling operation on the same using `row major` and `column major` order. Compare the time required to do the same in the form of a table

In [1]:
import time, random
import pandas as pd
from typing import List
from tqdm import tqdm
from copy import deepcopy

In [2]:
def get_square_matrix(n:int):
    A = []
    for i in range(n):
        records = []
        for j in range(n):
            records.append(random.randint(0,11))
        A.append(records)
    return A

def scale_row_major(A: List, scaling_factor:int = 1):
    A = deepcopy(A)
    r, c = len(A), len(A[0])
    for i in range(r):
        for j in range(c):
            # Row major
            A[i][j] = A[i][j] * scaling_factor
    return A

def scale_column_major(A: List, scaling_factor:int = 1):
    A = deepcopy(A)
    r, c = len(A), len(A[0])
    for i in range(r):
        for j in range(c):
            # Column major
            A[j][i] = A[j][i] * scaling_factor
    return A

In [3]:
# Define containers for keeping track of time, 
# repeat the experiment 5 times for every order & size and take an average of it eventually
time_container = []
n_trials = 5

In [4]:
for size in tqdm([16, 32, 64, 128, 256, 512, 768, 1024, 1536, 2048, 3072, 4096], desc = "Comparing for 12 sizes"):
    A = get_square_matrix(size)
    for tr in range(n_trials):
        # Row Major
        start = time.time()
        _ = scale_row_major(A)
        time_elapsed = time.time() - start
        time_container.append([size, "Row Major", time_elapsed])
        
        # Column Major
        start = time.time()
        _ = scale_column_major(A)
        time_elapsed = time.time() - start
        time_container.append([size, "Column Major", time_elapsed])

Comparing for 12 sizes: 100%|██████████| 12/12 [03:31<00:00, 17.62s/it]


In [5]:
# Look at the individual times taken across different trials
df = pd.DataFrame(time_container, columns = ["Size", "Order", "Time"])
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,110,111,112,113,114,115,116,117,118,119
Size,16,16,16,16,16,16,16,16,16,16,...,4096,4096,4096,4096,4096,4096,4096,4096,4096,4096
Order,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,...,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major
Time,0.000663996,0.000629663,0.000621796,0.000652313,0.000616074,0.000611782,0.00121188,0.000624895,0.000615835,0.000649929,...,8.47738,8.76069,8.51824,8.80686,8.61461,8.91558,8.65599,8.84131,8.6127,8.91145


In [6]:
# Look at the aggregated times
df.groupby(by = ["Size", "Order"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Time
Size,Order,Unnamed: 2_level_1
16,Column Major,0.000634
16,Row Major,0.000746
32,Column Major,0.001995
32,Row Major,0.002077
64,Column Major,0.002503
64,Row Major,0.002571
128,Column Major,0.008313
128,Row Major,0.008252
256,Column Major,0.03275
256,Row Major,0.032821
