In [10]:
PATH_OF_FOLDER:str = "/home/shared/val2"
PATH_OF_CONFIG_FILE:str = "/home/shared/imagenet_class_index.json"
MODEL_NAME:str = "vit_base_patch16_224"
NO_OF_CLASSES = 10
DEVICE_CUDA = "cuda:1"
DEVICE_CPU = "cpu"
MINIMUM_TOTAL_OCCURENCES = 80
SCALE = 5

#### Some variables to be reused

In [11]:
from timm.data import resolve_data_config
from torchvision.transforms import Compose
from timm.data.transforms_factory import create_transform
config:dict = resolve_data_config({}, model = MODEL_NAME)
TRANSFORMATION_COMPOSE :Compose = create_transform(**config)

In [12]:
from PIL import Image
from torch import Tensor
from cudf.core.dataframe import DataFrame
import cupy
import cudf

class HelperFuntions:
    
    @staticmethod
    def list_files_os_listdir(directory_path):
        """
        Lists all files in a given directory using os.listdir().
        Returns a list of full paths to the files.
        """
        files = []
        try:
            # Get all entries (files and directories)
            for item_name in os.listdir(directory_path):
                # Construct the full path
                item_path = os.path.join(directory_path, item_name)
                # Check if it's a file
                if os.path.isfile(item_path):
                    files.append(item_path)
        except FileNotFoundError:
            print(f"Error: Directory '{directory_path}' not found.")
        except PermissionError:
            print(f"Error: Permission denied for directory '{directory_path}'.")
        return files

    @staticmethod
    def get_input_tensor(file_name:str) -> Tensor:
        img = Image.open(file_name).convert("RGB")
        input_tensor = TRANSFORMATION_COMPOSE(img).to(device).unsqueeze(0)
        return input_tensor

    @staticmethod
    def multiply_two_matrices(matrix_a:Tensor, matrix_b:Tensor) -> Tensor:
        m, n = matrix_a.shape
        n2 , p = matrix_b.shape
        matrix_a = matrix_a.unsqueeze(2).expand(m, n, p)
        matrix_b = matrix_b.t().unsqueeze(0).expand(m, p, n).transpose(1, 2)
        result = matrix_a * matrix_b
        matrix_a = matrix_a.reshape(-1)
        matrix_b = matrix_b.reshape(-1)
        result = result.reshape(-1)
        ops = torch.stack([matrix_a, matrix_b, result], dim=1)
        del matrix_a, matrix_b, result
        return ops

    @staticmethod
    def find_common_operations(ops:Tensor) -> DataFrame:
        dlpack = torch.utils.dlpack.to_dlpack(ops)
        cupy_array = cupy.from_dlpack(dlpack)
        del dlpack, ops
        df = cudf.DataFrame({
            "a": cupy_array[:, 0],
            "b": cupy_array[:, 1],
            "r": cupy_array[:, 2],
        })
        df_copy = df.copy()
        del df, cupy_array
        # Round columns
        df_copy["rounded_r"] = df_copy["r"].round(SCALE)
        df_copy["a_rounded"] = df_copy["a"].round(SCALE)
        df_copy["b_rounded"] = df_copy["b"].round(SCALE)
        df_copy["count"] = cudf.Series(1, index=df_copy.index, dtype='uint32')
        
        # Compute operands
        df_copy["operand1"] = df_copy[["a_rounded", "b_rounded"]].min(axis=1)
        df_copy["operand2"] = df_copy[["a_rounded", "b_rounded"]].max(axis=1)

        # Group, aggregate, and create a new DataFrame
        new_df = (
            df_copy.groupby(["rounded_r", "operand1", "operand2"])
                   .agg({"count": "sum"})
                   .rename(columns={"count": "total_occurrences"})
                   .reset_index()
        )
        
        del df_copy
        
        new_df = new_df[new_df["total_occurrences"] >= MINIMUM_TOTAL_OCCURENCES]
        
        # Sort and limit
        return new_df.sort_values(
            by=["total_occurrences", "operand2"],
            ascending=[False, False]
        )

    def merge_two_dfs(df1:DataFrame, df2:DataFrame) -> DataFrame:
        columns_to_match = ['rounded_r', 'operand1', 'operand2']
        merged_gdf = df1.merge(df2, on=columns_to_match, how='outer', suffixes=('_df1', '_df2'))
        merged_gdf['total_occurrences_summed'] = merged_gdf['total_occurrences_df1'].fillna(0) + merged_gdf['total_occurrences_df2'].fillna(0)
        merged_gdf = merged_gdf.rename(columns={'total_occurrences_summed': 'total_occurrences'})
        columns_to_drop = ['total_occurrences_df1', 'total_occurrences_df2']
        merged_gdf = merged_gdf.drop(columns=columns_to_drop).sort_values(by = ["total_occurrences", "operand2"], ascending = [False, False])
        torch.cuda.empty_cache()
        return merged_gdf

        
        
        

### Getting Image paths with there classes

In [13]:
import json, random, os
available_options:dict = {}
with open(PATH_OF_CONFIG_FILE, 'r') as file:
    available_options = json.load(file)
# {882: ['n04517823', 'vacuum'], 910: ['n04597913', 'wooden_spoon'],}
CLASS_LIST = {i: available_options[str(i)] for i in random.sample(range(0, 1000), NO_OF_CLASSES)}


"""
{'n02096294': ['/home/shared/val2/n02096294/ILSVRC2012_val_00009052.JPEG'],
 'n02870880': ['/home/shared/val2/n02870880/ILSVRC2012_val_00009820.JPEG'],
"""
FILE_PATHS ={x[0]: HelperFuntions.list_files_os_listdir(os.path.join(PATH_OF_FOLDER, x[0])) for x in CLASS_LIST.values()}

## Setting up intial matrices

In [11]:
import torch
from timm import create_model
from torchvision.transforms import Compose

device = torch.device(DEVICE_CUDA if torch.cuda.is_available() else DEVICE_CPU)
model = create_model(MODEL_NAME, pretrained=True)
model = model.to(device)


matrix_a = model.patch_embed.proj.weight.view(768,-1)


In [15]:
import torch.nn.functional as F

def get_for_patch_embed(file_path:str) -> DataFrame:
    matrix_b = F.unfold(HelperFuntions.get_input_tensor(file_path), kernel_size = (16,16), stride = (16,16))[0]
    torch.cuda.empty_cache()

    df = HelperFuntions.find_common_operations(HelperFuntions.multiply_two_matrices(matrix_a, matrix_b))
    
    torch.cuda.empty_cache()
    return df


    

In [16]:

def get_for_class(index) -> DataFrame:
    class_selected = FILE_PATHS[list(FILE_PATHS.keys())[index]]
    
    accum :DataFrame = get_for_patch_embed(class_selected[0])
    for i in range(1, len(class_selected)):
        df = get_for_patch_embed(class_selected[i])
        accum = HelperFuntions.merge_two_dfs(df, accum)
        del df
    torch.cuda.empty_cache()
    return accum



In [19]:
accum = get_for_class(0)
for i in range(1, len(FILE_PATHS.keys())):
    df = get_for_class(i)
    accum = HelperFuntions.merge_two_dfs(accum, df)
    del df
torch.cuda.empty_cache()
accum


Unnamed: 0,rounded_r,operand1,operand2,total_occurrences
17664,0.00020,0.00009,2.24891,25380
17665,-0.00058,-0.00026,2.24891,24331
17666,0.00263,0.00117,2.24891,24262
17667,0.00236,0.00105,2.24891,23447
17668,-0.00184,-0.00082,2.24891,23046
...,...,...,...,...
2536576,0.05298,-1.47329,-0.03596,80
2536577,0.07405,-2.01821,-0.03669,80
2536580,0.05874,-1.59529,-0.03682,80
2536578,0.08657,-2.01821,-0.04289,80


In [12]:
model

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(