## Base Line Model
with Local Binary Pattern, resnet18 small with 1 added layer, flat architecture.

### Imports

In [None]:
import os
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
import torch.nn as nn
import matplotlib.pyplot as plt
from PIL import Image
from skimage.feature import local_binary_pattern
from skimage import color
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### LBP Transform

In [None]:
class LBPTransform:
    def __init__(self, radius=3, n_points=None, method='uniform'):
        self.radius = radius
        self.n_points = n_points if n_points else 8 * radius
        self.method = method

    def __call__(self, img):
        if isinstance(img, Image.Image):
            img = np.array(img)

        if len(img.shape) == 3 :
            gray = color.rgb2gray(img)
        else:
            gray = img

        gray = (gray * 255).astype(np.uint8)

        lbp = local_binary_pattern(gray, self.n_points, self.radius, self.method)

        lbp = (lbp - lbp.min()) / (lbp.max() - lbp.min() + 1e-7)

        lbp_3 = np.stack([lbp, lbp, lbp], axis=-1)

        return lbp_3

### Make LBP Images

In [None]:
def make_lbp_csv(input_folder, csv_path, lbp_transformer):
    img_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]

    print(f"Processing {len(img_files)} images from {input_folder}...")
    with open(csv_path, 'w') as f:
        writer = csv.writer(f)

        header_written=False

        for img, fname in enumerate(img_files):
          in_path = os.path.join(input_folder, fname)
          img = Image.open(in_path).convert('RGB')

          img = lbp_transformer(img)
          img = img.flatten()

          if not header_written:
            header = ['PGCname'] + [f'pixel_{i}' for i in range(len(img))]
            writer.writerow(header)
            header_written = True

          writer.writerow([fname] + img.tolist)

    print("LBP preprocessing complete.")

### Dataset Class
Class for processing data and combining images with labels

In [None]:
class PGCDataset(Dataset):
    def __init__(self, labels_df, img_folder, id_col='PGCname', label_col='T', transform=None):
        self.labels_df = labels_df.reset_index(drop=True)
        self.img_folder = img_folder
        self.id_col = id_col
        self.label_col = label_col
        self.transform = transform

        available_imgs = {f.replace('.png', '') for f in os.listdir(img_folder)
                            if f.endswith('.png')}
        self.labels_df = self.labels_df[self.labels_df[id_col].isin(available_imgs)].reset_index(drop=True)

        print(f"Dataset created with {len(self.labels_df)} imgs")

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]

        img_id = row[self.id_col]
        img_path = os.path.join(self.img_folder, f"{img_id}.png")
        img = Image.open(img_path).convert('RGB')

        label = torch.tensor(int(row[self.label_col]), dtype=torch.long)

        if self.transform:
            img = self.transform(img)

        return img, label, img_id

### Dataset creation

In [None]:
path = '/content/drive/Othercomputers/My laptop/Thesis/Galaxy-Classifier/'
img_folder = path + '/images'
lbp_img_folder = path + '/lbp_images'

id_col = 'PGCname'
label_col = 'T'

img_size = 224

labels_df = pd.read_csv(path + 'EFIGI_attributes.txt', sep=r'\s+', comment='#')
#labels_df[label_col] = labels_df[label_col].replace({-6:-4, -5:-4}) # E
labels_df[label_col] = labels_df[label_col].replace({-3:-2, -1:-2}) # S0
labels_df[label_col] = labels_df[label_col].replace({0:1, 2:1}) # Sa
labels_df[label_col] = labels_df[label_col].replace({3:4}) # Sb
labels_df[label_col] = labels_df[label_col].replace({5:6}) # Sc
labels_df[label_col] = labels_df[label_col].replace({8:7, 9:7}) # Sd
labels_df[label_col] = labels_df[label_col].replace({10:11}) # Irr

labels_df[label_col] = labels_df[label_col].replace({-6:0, -5:1, -4:2, -2:3, 1:4, 4:5, 11:8}) # Adjust to 0 - 8


train_df, test_df = train_test_split(labels_df, test_size=0.2, random_state=0, stratify=labels_df[label_col])
train_df, val_df = train_test_split(train_df, test_size=0.125, random_state=0, stratify=train_df[label_col])

# use stratify sampling in training - write to csv file - - tocsv.pandas


train_transform = transforms.Compose([
    transforms.RandomRotation(180),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

"""lbp_params = {'radius': 3, 'n_points': 24, 'method': 'uniform'}

lbp_transform = LBPTransform(**lbp_params)

make_lbp_images(img_folder, lbp_img_folder, lbp_transform)
"""


train_dataset = PGCDataset(
    labels_df=train_df,
    img_folder=img_folder,
    id_col=id_col,
    label_col=label_col,
    transform=train_transform
)
val_dataset = PGCDataset(
    labels_df=val_df,
    img_folder=img_folder,
    id_col=id_col,
    label_col=label_col,
    transform=test_transform
)
test_dataset = PGCDataset(
    labels_df=test_df,
    img_folder=img_folder,
    id_col=id_col,
    label_col=label_col,
    transform=test_transform
)

Dataset created with 3120 imgs
Dataset created with 446 imgs
Dataset created with 892 imgs


### Data loader
loads data in batches

In [None]:
labels = train_df[label_col].values
classes= np.unique(labels)
class_weights = compute_class_weight('balanced', classes=classes, y=labels)

sample_weights = np.array([class_weights[np.where(classes == label)[0][0]] for label in labels])
sample_weights = torch.from_numpy(sample_weights).float()

sampler = torch.utils.data.WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=0,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0
)

### Make model
using pretrained resnet18

In [None]:
def resnet_model(num_classes, freeze_backbone=True):
    model = models.resnet18(weights='IMAGENET1K_V1')
    for param in model.parameters():
        param.requires_grad = not freeze_backbone
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

## Train/test model methods

In [None]:
def train_one_epoch(model, dataloader, criterion, optimizer, device, scaler=None):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for img, labels, ids in dataloader:
        img = img.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        optimizer.zero_grad()

        outputs = model(img)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        print(".", end="")
    print("")

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc

def valid(model, dataloader, device, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X, y, _ in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

            all_preds.extend(pred.argmax(1).cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    test_loss /= num_batches
    correct /= size

    report = classification_report(all_labels, all_preds, output_dict=True, digits=4, zero_devision=0)

    return test_loss, correct * 100, report['macro avg']['f1-score'] * 100


def test(model, dataloader, device, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X, y, _ in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

            all_preds.extend(pred.argmax(1).cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    test_loss /= num_batches
    correct /= size

    print(classification_report(all_labels, all_preds, digits=4))

    return test_loss, correct * 100


## Train model

In [None]:
num_classes = labels_df[label_col].nunique()
model = resnet_model(num_classes=num_classes, freeze_backbone=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
torch.backends.cudnn.benchmark = True
print("Using device:", device)

class_weights = compute_class_weight('balanced', classes=np.unique(train_df[label_col]), y=train_df[label_col])
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
train_criterion = nn.CrossEntropyLoss(class_weights)

test_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)

best_acc = 0.0

epochs = 50
for epoch in range(epochs):
    train_loss, train_correct= train_one_epoch(model, train_loader, train_criterion, optimizer, device)
    val_loss, val_correct, val_F1 = valid(model, val_loader, device, test_criterion)

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_correct:.2f}%")
    print(f"  Val   Loss: {val_loss:.4f} | Val   Acc: {val_correct:.2f}% | Val   Macro F1: {val_F1:.2f}%")

    scheduler.step(val_loss)

    if val_F1 > best_acc:
        best_acc = val_F1
        torch.save(model.state_dict(), path + 'flat.pth')


Using device: cuda
.................................................


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/50
  Train Loss: 1.4781 | Train Acc: 46.38%
  Val   Loss: 1.5529 | Val   Acc: 42.60 | Val   Macro F1: 0.34059806450650554%
.................................................


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/50
  Train Loss: 1.2302 | Train Acc: 53.37%
  Val   Loss: 0.9999 | Val   Acc: 60.31 | Val   Macro F1: 0.448129252203908%
.................................................
Epoch 3/50
  Train Loss: 1.0190 | Train Acc: 58.40%
  Val   Loss: 0.9202 | Val   Acc: 63.68 | Val   Macro F1: 0.5781589241290483%
.................................................
Epoch 4/50
  Train Loss: 0.9290 | Train Acc: 61.83%
  Val   Loss: 1.6042 | Val   Acc: 43.05 | Val   Macro F1: 0.39344950383355926%
.................................................
Epoch 5/50
  Train Loss: 0.8486 | Train Acc: 64.07%
  Val   Loss: 0.8560 | Val   Acc: 63.23 | Val   Macro F1: 0.581138454229038%
.................................................


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 6/50
  Train Loss: 0.9890 | Train Acc: 58.59%
  Val   Loss: 1.3372 | Val   Acc: 47.76 | Val   Macro F1: 0.36994586484333936%
.................................................


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 7/50
  Train Loss: 0.9382 | Train Acc: 61.57%
  Val   Loss: 1.1424 | Val   Acc: 54.71 | Val   Macro F1: 0.4194351965545643%
.................................................
Epoch 8/50
  Train Loss: 0.9755 | Train Acc: 61.06%
  Val   Loss: 0.9232 | Val   Acc: 61.88 | Val   Macro F1: 0.5722954039542443%
.................................................
Epoch 9/50
  Train Loss: 0.7498 | Train Acc: 65.54%
  Val   Loss: 0.8867 | Val   Acc: 62.78 | Val   Macro F1: 0.515664617649547%
.................................................
Epoch 10/50
  Train Loss: 0.6516 | Train Acc: 70.48%
  Val   Loss: 0.7758 | Val   Acc: 68.39 | Val   Macro F1: 0.5971273271664019%
.................................................
Epoch 11/50
  Train Loss: 0.6116 | Train Acc: 70.06%
  Val   Loss: 0.7226 | Val   Acc: 70.40 | Val   Macro F1: 0.6478119635947975%
.................................................
Epoch 12/50
  Train Loss: 0.5876 | Train Acc: 71.83%
  Val   Loss: 0.7145 | Val   Acc: 69.73 | Val 

## Test model

In [None]:
#model = resnet_model(num_classes=num_classes, freeze_backbone=False)

#model.load_state_dict(torch.load(path + 'flat.pth'))
#resnet 18, stratified sampling, raw image data (pixel matric data)
model.to(device)
model.eval()

test_loss, correct = test(model, test_loader, device, test_criterion)

              precision    recall  f1-score   support

           0     0.3333    0.7500    0.4615         4
           1     0.6949    0.9111    0.7885        45
           2     0.5833    0.7778    0.6667         9
           3     0.8571    0.6729    0.7539       107
           4     0.7023    0.6815    0.6917       135
           5     0.7302    0.6970    0.7132       198
           6     0.6646    0.7133    0.6881       150
           7     0.8563    0.8232    0.8394       181
           8     0.7808    0.9048    0.8382        63

    accuracy                         0.7466       892
   macro avg     0.6892    0.7702    0.7157       892
weighted avg     0.7543    0.7466    0.7473       892

