In [1]:
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


Step 1 : Restructuring OS

In [2]:
import os

def restructure(dir, num_classes):
    for i in range(1, num_classes+1):
        dir_to_create = f".\\{dir}\\flower{str(i)}"
        
        if not os.path.isdir(dir_to_create):
            os.mkdir(dir_to_create)

    cwd = os.path.abspath(dir)

    for file in os.listdir(dir):
        if file.endswith(".jpg"):
            os.replace(f"{cwd}\\{file}", f"{cwd}\\flower{file.split("_")[0]}\\{file}")

restructure("train_data", 60)
restructure("val_data", 60)

In [3]:
import os
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
from torchvision import transforms

class PCAFeatureExtractor(object):
    def __init__(self, n_components=None):
        self.n_components = n_components
        self.pca = None

    def fit(self, images):
        # Flatten the images
        flat_images = [np.array(img).flatten() for img in images]
        X = np.array(flat_images)
        
        # Determine the number of components
        n_samples, n_features = X.shape
        max_components = min(n_samples, n_features) - 1  # Subtract 1 to be safe
        self.n_components = min(self.n_components or max_components, max_components)
        
        print(f"Fitting PCA with {self.n_components} components")
        
        # Fit PCA
        self.pca = PCA(n_components=self.n_components)
        self.pca.fit(X)

    def __call__(self, img):
        # Convert PIL Image to numpy array and flatten
        img_array = np.array(img).flatten()
        
        # Transform the image using the fitted PCA
        if self.pca is not None:
            features = self.pca.transform(img_array.reshape(1, -1))
            return features.flatten()
        else:
            raise ValueError("PCA has not been fitted. Call fit() method first.")

def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        try:
            with Image.open(img_path) as img:
                img = img.convert('RGB')  # Ensure all images are in RGB format
                img = img.resize((64, 64))  # Resize to 256x256
                images.append(img)
        except:
            print(f"Error loading image: {img_path}")
    return images

def get_pca_transform(main_folder, n_components=None):
    all_images = []
    
    # Iterate through all subfolders
    for class_folder in os.listdir(main_folder):
        class_path = os.path.join(main_folder, class_folder)
        if os.path.isdir(class_path):
            images = load_images_from_folder(class_path)
            all_images.extend(images)
    
    print(f"Total images loaded: {len(all_images)}")
    
    # Create and fit the PCA transform
    pca_transform = PCAFeatureExtractor(n_components=n_components)
    pca_transform.fit(all_images)
    
    # Create a composition of transforms
    transform = transforms.Compose([
        transforms.Resize((64, 64)),  # Ensure all images are 256x256
        transforms.ToTensor(),  # Convert to tensor
        transforms.Lambda(lambda x: Image.fromarray(x.mul(255).byte().numpy().transpose(1, 2, 0))),  # Convert back to PIL Image
        pca_transform  # Apply PCA feature extraction
    ])
    
    return transform

Step 2 : Load Dataset into torch Loader

In [4]:
import torch.utils.data
from torchvision import datasets, transforms

# transform = transforms.Compose([
#    transforms.Resize((64, 64)),  # Resize image to 64x64 to reduce computational load
#     transforms.Lambda(lambda img: pca_transform(img)),  # Apply PCA (after fitting)
#     transforms.Lambda(lambda x: (x - x.mean()) / x.std()),  # Standardize the data])
# ])

main_folder = "train_data"
pca_transform = get_pca_transform(main_folder=main_folder, n_components=5000)
transform= pca_transform

train_dataset = datasets.ImageFolder("train_data", transform=transform)
val_dataset = datasets.ImageFolder("val_data", transform=transform)

batch_size = 256
train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

Total images loaded: 3000
Fitting PCA with 2999 components


In [5]:
for batch, (X, y) in enumerate(train_dl):
    print(X.shape)
    print(batch)
    print(y)

torch.Size([256, 2999])
0
tensor([44, 52,  5, 37, 50,  0, 21,  5, 31, 25, 34, 38, 26,  0,  4, 31, 24, 36,
        18, 14,  2, 32, 53, 19, 57, 51, 25, 42, 41, 52, 23, 36,  3, 39, 33,  3,
         3,  5, 56, 30, 43, 41, 33, 29, 44, 27,  8, 54, 47, 51, 15, 13, 49, 18,
        32,  3, 21, 38, 41, 18, 22, 48,  7,  2, 28, 33, 32, 14, 54, 18, 33, 38,
        32,  8, 15, 43, 56, 59, 52, 25, 18, 56, 22, 51, 45,  2, 57, 48, 13, 37,
        51,  4, 24, 46, 53, 55, 31, 30, 50, 29, 50, 50, 51, 16, 28, 31, 17, 35,
        54, 16, 20, 56, 20,  5, 18,  1, 53,  0, 51, 13, 16, 30, 23, 42, 52, 29,
        19, 56, 24,  4, 59, 17, 39, 46,  8, 26,  5, 29, 17, 12, 29,  8, 12, 15,
        18, 32, 12, 37,  0, 37, 48, 28, 54, 38, 57, 11, 47,  1, 42, 12, 10,  4,
        14, 16, 33, 40, 39, 53, 20, 28, 26, 11, 46, 43, 19, 51, 38, 46, 13, 42,
        18, 41, 29, 55, 23,  8, 28, 57,  2, 33, 25,  1, 54,  1, 34,  7, 30, 37,
        44, 25, 40, 36, 38, 12, 52, 33, 45,  0, 47, 52,  5, 31, 56, 14, 43,  9,
        52, 39

Step 3 : Define Model

In [9]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(2999, 1024),
            nn.ReLU(),
            nn.Dropout(0.30),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.30),
            nn.Linear(256, 60),
        
        )

    def forward(self, X):
        X = self.flatten(X)
        logits = self.linear_relu_stack(X)
        return logits
    
model = Model().to(device)
print(model)

Model(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=2999, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=1024, out_features=256, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=256, out_features=60, bias=True)
  )
)


In [10]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X = X.float().to(device)
        # one_hot_y = torch.eye(60)[y].to(device)
        y = y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss, current = loss.item(), batch * batch_size + len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X = X.float().to(device)
            # one_hot_y = torch.eye(60)[y].to(device)
            y = y.to(device)
            
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Step 4 : Execution

In [11]:
import torch.optim

torch.manual_seed(42)

learning_rate = 1e-2
epochs = 20

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-2)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dl, model, loss_fn, optimizer)
    test_loop(val_dl, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 39.953613  [  256/ 3000]
loss: 204.710190  [  512/ 3000]
loss: 164.515533  [  768/ 3000]
loss: 67.131531  [ 1024/ 3000]
loss: 8.172914  [ 1280/ 3000]
loss: 4.112161  [ 1536/ 3000]
loss: 4.090486  [ 1792/ 3000]
loss: 4.095487  [ 2048/ 3000]
loss: 4.087248  [ 2304/ 3000]
loss: 4.228390  [ 2560/ 3000]
loss: 4.102129  [ 2816/ 3000]
loss: 4.096344  [ 3000/ 3000]
Test Error: 
 Accuracy: 1.7%, Avg loss: 4.096114 

Epoch 2
-------------------------------
loss: 4.095122  [  256/ 3000]
loss: 4.095288  [  512/ 3000]
loss: 4.097998  [  768/ 3000]
loss: 4.098407  [ 1024/ 3000]
loss: 4.090095  [ 1280/ 3000]
loss: 4.094711  [ 1536/ 3000]
loss: 4.093777  [ 1792/ 3000]
loss: 4.100707  [ 2048/ 3000]
loss: 4.095455  [ 2304/ 3000]
loss: 4.290407  [ 2560/ 3000]
loss: 4.094791  [ 2816/ 3000]
loss: 4.094044  [ 3000/ 3000]
Test Error: 
 Accuracy: 1.7%, Avg loss: 4.094940 

Epoch 3
-------------------------------


KeyboardInterrupt: 