# Continual Learning Demonstration

CSCE 5280 - AI for Healthcare and Wearables

Professor: Dr. Mark Albert  

TA: Mr. Riyad Bin Rafiq


---


Students working on the project:

Fahmid Shahriar Iqbal,
Gayaetiri Chalasani,
Isaac Gregory,
Joseph Caldwell

In [39]:
# Imports
import kagglehub
import os
import numpy as np
from torchvision.transforms import v2
import torch
import pandas as pd
from torchvision import datasets, transforms

## Dataset

### Gathering Images and Labels

In [40]:
# Downloading data (NOTE: There are no issues if data is already downloaded)
path = kagglehub.dataset_download("grassknoted/asl-alphabet") + "/asl_alphabet_train/asl_alphabet_train"
print("Path to dataset files:", path)

Path to dataset files: /Users/isaacgregory/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1/asl_alphabet_train/asl_alphabet_train


Creating folder setup for loading into PyTorch later...

In [42]:
# Separating each task's directories
task1_dirs = ["A", "B", "C", "D", "E", "F", "G", "H"]
task2_dirs = ["I", "K", "L", "M", "N", "O", "P", "Q"]
task3_dirs = ["R", "S", "T", "U", "V", "W", "X", "Y"]

# Creating new directories in path for different tasks
# (This helps in loading to pytorch later)
if not os.path.exists(path + "/task1"):
  os.mkdir(path + "/task1")
  os.mkdir(path + "/task2")
  os.mkdir(path + "/task3")

  # Moving directories into respective tasks
  for dir in task1_dirs:
    os.rename(path + "/" + dir, path + "/task1/" + dir)
  for dir in task2_dirs:
    os.rename(path + "/" + dir, path + "/task2/" + dir)
  for dir in task3_dirs:
    os.rename(path + "/" + dir, path + "/task3/" + dir)

Storing file paths and labels into dataframe...

(NOTE: this may prove to be unnecessary now)

In [None]:
task1_list = []
task2_list = []
task3_list = []

# Walking through folders
for dir, _, files in os.walk(path):
  for filename in files:

    # Printing labels and photos
    # print(dirname[-1], filename)

    # Gathering data into dataframes
    # (each letter except J and Z due to movement)
    if dir[-1] in task1_dirs:
      task1_list.append((os.path.join(dir, filename), dir[-1]))
    elif dir[-1] in task2_dirs:
      task2_list.append((os.path.join(dir, filename), dir[-1]))
    elif dir[-1] in task3_dirs:
      task3_list.append((os.path.join(dir, filename), dir[-1]))

task1 = pd.DataFrame(task1_list, columns=['file_path', 'target'])
task2 = pd.DataFrame(task2_list, columns=['file_path', 'target'])
task3 = pd.DataFrame(task3_list, columns=['file_path', 'target'])

Checking the data...

In [None]:
print(np.unique(task1['target']), task1.shape)
print(np.unique(task2['target']), task2.shape)
print(np.unique(task3['target']), task3.shape)

['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H'] (24000, 2)
['I' 'K' 'L' 'M' 'N' 'O' 'P' 'Q'] (24000, 2)
['R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y'] (24000, 2)


In [None]:
task1.head()

Unnamed: 0,file_path,target
0,/root/.cache/kagglehub/datasets/grassknoted/as...,C
1,/root/.cache/kagglehub/datasets/grassknoted/as...,C
2,/root/.cache/kagglehub/datasets/grassknoted/as...,C
3,/root/.cache/kagglehub/datasets/grassknoted/as...,C
4,/root/.cache/kagglehub/datasets/grassknoted/as...,C


In [None]:
task2.head()

Unnamed: 0,file_path,target
0,/root/.cache/kagglehub/datasets/grassknoted/as...,O
1,/root/.cache/kagglehub/datasets/grassknoted/as...,O
2,/root/.cache/kagglehub/datasets/grassknoted/as...,O
3,/root/.cache/kagglehub/datasets/grassknoted/as...,O
4,/root/.cache/kagglehub/datasets/grassknoted/as...,O


In [None]:
task3.head()

Unnamed: 0,file_path,target
0,/root/.cache/kagglehub/datasets/grassknoted/as...,X
1,/root/.cache/kagglehub/datasets/grassknoted/as...,X
2,/root/.cache/kagglehub/datasets/grassknoted/as...,X
3,/root/.cache/kagglehub/datasets/grassknoted/as...,X
4,/root/.cache/kagglehub/datasets/grassknoted/as...,X


In [None]:
print(task1.groupby('target').count())
print(task2.groupby('target').count())
print(task3.groupby('target').count())

        file_path
target           
A            3000
B            3000
C            3000
D            3000
E            3000
F            3000
G            3000
H            3000
        file_path
target           
I            3000
K            3000
L            3000
M            3000
N            3000
O            3000
P            3000
Q            3000
        file_path
target           
R            3000
S            3000
T            3000
U            3000
V            3000
W            3000
X            3000
Y            3000


### EDA

### Data Augmentation

In [43]:
torch.manual_seed(1)

<torch._C.Generator at 0x116659f30>

Gathering the mean and standard deviation for each colorband for each task. This is to be used later for Normalizing the images when transforming. 

This had to be done locally, rather than on Colab, due to RAM usage.

In [71]:
def ColorbandDistribution(data_path):
    vals = [[],[],[]]   # Storing colorband channel information
    mean = []           # means for each colorband
    std = []            # standard deviation for each color band

    # Getting channel information
    temp_dataset = datasets.ImageFolder(data_path, transform=transforms.ToTensor())
    for images, _ in temp_dataset:
        for j in range(3):  # Assuming 3 channels (for RGB)
            channel = images[j, :, :]
            vals[j].append(channel)

    # Finding mean and standard deviation for each channel
    for i in range(3):
        mean.append(np.mean(vals[i]))
        std.append(np.std(vals[i]))

    return mean, std

In [72]:
print(ColorbandDistribution(path + "/task1"))

([np.float32(0.5132197), np.float32(0.4820249), np.float32(0.4969285)], [np.float32(0.24072976), np.float32(0.26737508), np.float32(0.27361518)])


Results...

Mean: [0.5132197, 0.4820249, 0.4969285]

STD:  [0.24072976, 0.26737508, 0.27361518]

In [73]:
print(ColorbandDistribution(path + "/task2"))

([np.float32(0.5258291), np.float32(0.5022421), np.float32(0.5179816)], [np.float32(0.228203), np.float32(0.25652444), np.float32(0.2656579)])


Results...

Mean: [0.5258291, 0.5022421, 0.5179816]

STD: [0.228203, 0.25652444, 0.2656579]

In [74]:
print(ColorbandDistribution(path + "/task3"))

([np.float32(0.517228), np.float32(0.50707084), np.float32(0.52376896)], [np.float32(0.2185833), np.float32(0.2464939), np.float32(0.2539093)])


Results...

Mean: [0.517228, 0.50707084, 0.52376896]

STD: [0.2185833, 0.2464939, 0.2539093]

In [66]:
def CreateTaskAugmentedDatset(data_path, mean, std):
    # Creating image transformations/augmentations
    task_transforms = v2.Compose([
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        v2.RandomResizedCrop(size=(200, 200), antialias=True),
        v2.RandomRotation(degrees=90, expand=False),
        v2.ColorJitter(),
        v2.Normalize(mean=mean, std=std),
    ])

    # Setup for the non-transformed images
    task_standard = v2.Compose([
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=mean, std=std),
    ])

    # Getting original data and creating transformed data
    original_task = datasets.ImageFolder(data_path, transform=task_standard)
    transformed_task = datasets.ImageFolder(data_path, transform=task_transforms)

    # Combining the transformed and non-transformed images datasets
    combined_task = torch.utils.data.ConcatDataset([original_task, transformed_task])
    task = torch.utils.data.DataLoader(combined_task, batch_size=32, shuffle=True)

    return task

In [67]:
# Creating dataset for task1
task1 = CreateTaskAugmentedDatset(path + "/task1", mean=[0.5132197, 0.4820249, 0.4969285], std=[0.24072976, 0.26737508, 0.27361518])

In [68]:
labels = []
for image, label in task1.dataset:
    labels.append(label)

print(np.unique(labels))

[0 1 2 3 4 5 6 7]


In [69]:
task2 = CreateTaskAugmentedDatset(path + "/task2", mean=[0.5258291, 0.5022421, 0.5179816], std=[0.228203, 0.25652444, 0.2656579])

In [70]:
task3 = CreateTaskAugmentedDatset(path + "/task3", mean=[0.517228, 0.50707084, 0.52376896], std=[0.2185833, 0.2464939, 0.2539093])