# Locality Comparison

Define a matrix of size n x n and perform a simple scaling operation on the same using `row major` and `column major` order. Compare the time required to do the same in the form of a table

In [1]:
import time, random
import pandas as pd
import numpy as np
from typing import List
from tqdm import tqdm
from copy import deepcopy

In [2]:
def get_square_matrix(n:int):
    A = np.random.randn(n, n)
    return A

def scale_row_major(A, scaling_factor:int = 2):
    A = deepcopy(A)
    r, c = len(A), len(A[0])
    for i in range(r):
        for j in range(c):
            # Row major
            A[i][j] = A[i][j] * scaling_factor
    return A

def scale_column_major(A, scaling_factor:int = 2):
    A = deepcopy(A)
    r, c = len(A), len(A[0])
    for i in range(r):
        for j in range(c):
            # Column major
            A[j][i] = A[j][i] * scaling_factor
    return A

In [3]:
# Define containers for keeping track of time, 
# repeat the experiment 5 times for every order & size and take an average of it eventually
time_container = []
n_trials = 5

In [4]:
for size in tqdm([64, 128, 256, 512, 768, 1024, 1536, 2048, 3072, 4096, 6000], desc = "Comparing for 12 sizes"):
    A = get_square_matrix(size)
    for tr in range(n_trials):
        # Row Major
        start = time.time()
        _ = scale_row_major(A)
        time_elapsed = time.time() - start
        time_container.append([size, "Row Major", time_elapsed])
        
        # Column Major
        start = time.time()
        _ = scale_column_major(A)
        time_elapsed = time.time() - start
        time_container.append([size, "Column Major", time_elapsed])

Comparing for 12 sizes: 100%|██████████| 11/11 [06:11<00:00, 33.74s/it]


In [5]:
# Look at the individual times taken across different trials
df = pd.DataFrame(time_container, columns = ["Size", "Order", "Time"])
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,100,101,102,103,104,105,106,107,108,109
Size,64,64,64,64,64,64,64,64,64,64,...,6000,6000,6000,6000,6000,6000,6000,6000,6000,6000
Order,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,...,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major,Row Major,Column Major
Time,0.002228,0.002083,0.002092,0.002076,0.002102,0.002025,0.00201,0.002011,0.002075,0.002065,...,17.963508,18.64632,18.030059,18.594166,18.06867,19.197866,18.430285,18.845284,18.031836,18.732883


In [6]:
# Look at the aggregated times
df.groupby(by = ["Size", "Order"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Time
Size,Order,Unnamed: 2_level_1
64,Column Major,0.002052
64,Row Major,0.002101
128,Column Major,0.008109
128,Row Major,0.007978
256,Column Major,0.03201
256,Row Major,0.032132
512,Column Major,0.131892
512,Row Major,0.129618
768,Column Major,0.296647
768,Row Major,0.295258
