In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [2]:
# Download dataset from Kaggle
!kaggle datasets download -d azouzmaroua/algeria-ultrasound-images-thyroid-dataset-auitd

Dataset URL: https://www.kaggle.com/datasets/azouzmaroua/algeria-ultrasound-images-thyroid-dataset-auitd
License(s): unknown
Downloading algeria-ultrasound-images-thyroid-dataset-auitd.zip to /content
 97% 100M/103M [00:05<00:00, 21.7MB/s] 
100% 103M/103M [00:05<00:00, 19.5MB/s]


In [22]:

import zipfile
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import glob
import numpy as np
import random
import torch_geometric.nn as pyg_nn
from torch_geometric.data import Data
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ========== Reproducibility Setup ==========
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========== Data Download ==========
!kaggle datasets download -d azouzmaroua/algeria-ultrasound-images-thyroid-dataset-auitd

# ========== Data Transformations ==========
train_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ========== Dataset & DataLoader ==========
class ThyroidDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = 2 if "normal thyroid" in img_path else (1 if "Malignant" in img_path else 0)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

def load_dataset(root_dir):
    train_paths = glob.glob(os.path.join(root_dir, "train", "*", "*.jpg"))
    test_paths = glob.glob(os.path.join(root_dir, "test", "*", "*.jpg"))
    return train_paths, test_paths

# ========== Data Preparation ==========
batch_size = 32
zip_path = "/content/algeria-ultrasound-images-thyroid-dataset-auitd.zip"
extract_path = "/content/dataset_thyroid"

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

dataset_root = os.path.join(extract_path, "dataset thyroid")
if not os.path.exists(dataset_root):
    dataset_root = extract_path

train_paths, test_paths = load_dataset(dataset_root)
train_dataset = ThyroidDataset(train_paths, transform=train_transforms)
test_dataset = ThyroidDataset(test_paths, transform=test_transforms)

# Initialize generator for reproducibility
def worker_init_fn(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

generator = torch.Generator()
generator.manual_seed(SEED)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    worker_init_fn=worker_init_fn,
    generator=generator,
    drop_last=True  # Ensures consistent batch sizes
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2,
    worker_init_fn=worker_init_fn
)

# ========== Hybrid Model Architecture ==========
class HybridCNNGAT(nn.Module):
    def __init__(self):
        super(HybridCNNGAT, self).__init__()
        self.cnn = models.efficientnet_b4(weights="DEFAULT")
        for param in self.cnn.features[:-3].parameters():
            param.requires_grad = True
        self.cnn.classifier = nn.Identity()
        self.fc1 = nn.Linear(1792, 512)
        self.gat1 = pyg_nn.GATConv(512, 256, heads=4, concat=True, dropout=0.4)
        self.gat2 = pyg_nn.GATConv(256*4, 128, heads=4, concat=True, dropout=0.4)
        self.fc2 = nn.Linear(128*4, 3)

        # Initialize GAT layers with fixed seeds
        torch.manual_seed(SEED)
        self.gat1.reset_parameters()
        torch.manual_seed(SEED)
        self.gat2.reset_parameters()

    def forward(self, x, edge_index):
        cnn_features = self.cnn(x)
        x = torch.relu(self.fc1(cnn_features))
        x = self.gat1(x, edge_index)
        x = self.gat2(x, edge_index)
        return self.fc2(x)

# ========== Graph Construction ==========
def create_edge_index(num_nodes):
    edge_index = []
    for i in range(num_nodes):
        for j in range(i+1, min(i+3, num_nodes)):
            edge_index.append([i, j])
            edge_index.append([j, i])
    return torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# ========== Training Setup ==========
model = HybridCNNGAT().to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
criterion = nn.CrossEntropyLoss()

# ========== Training & Evaluation ==========
def train_model():
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        edge_index = create_edge_index(len(images)).to(device)

        optimizer.zero_grad()
        outputs = model(images, edge_index)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    return total_loss/len(train_loader)

def evaluate_model():
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            edge_index = create_edge_index(len(images)).to(device)
            outputs = model(images, edge_index)
            preds.extend(outputs.argmax(dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, preds)
    precision = precision_score(true_labels, preds, average='weighted', zero_division=1)
    recall = recall_score(true_labels, preds, average='weighted', zero_division=1)
    f1 = f1_score(true_labels, preds, average='weighted', zero_division=1)
    return accuracy, precision, recall, f1

# ========== Main Execution ==========
num_epochs = 20
best_f1 = 0

for epoch in range(num_epochs):
    train_loss = train_model()
    acc, prec, rec, f1 = evaluate_model()

    if f1 > best_f1:
        best_f1 = f1
        torch.save(model.state_dict(), "best_model.pth")

# Load best model for final evaluation
model.load_state_dict(torch.load("best_model.pth"))
final_acc, final_prec, final_rec, final_f1 = evaluate_model()
print("\nFinal Evaluation with Best Model:")
print(f"Accuracy: {final_acc:.4f} | Precision: {final_prec:.4f}")
print(f"Recall: {final_rec:.4f} | F1 Score: {final_f1:.4f}")

Dataset URL: https://www.kaggle.com/datasets/azouzmaroua/algeria-ultrasound-images-thyroid-dataset-auitd
License(s): unknown
algeria-ultrasound-images-thyroid-dataset-auitd.zip: Skipping, found more recently modified local copy (use --force to force download)
Epoch 1/20
Train Loss: 1.0616
Test Metrics - Acc: 0.2869 | Prec: 0.8646 | Rec: 0.2869 | F1: 0.2621
Epoch 2/20
Train Loss: 1.0459
Test Metrics - Acc: 0.1838 | Prec: 0.8613 | Rec: 0.1838 | F1: 0.0666
Epoch 3/20
Train Loss: 0.9961
Test Metrics - Acc: 0.1838 | Prec: 0.8585 | Rec: 0.1838 | F1: 0.0662
Epoch 4/20
Train Loss: 0.9150
Test Metrics - Acc: 0.6490 | Prec: 0.8868 | Rec: 0.6490 | F1: 0.6906
Epoch 5/20
Train Loss: 0.9075
Test Metrics - Acc: 0.9192 | Prec: 0.9163 | Rec: 0.9192 | F1: 0.9172
Epoch 6/20
Train Loss: 0.9008
Test Metrics - Acc: 0.9220 | Prec: 0.9220 | Rec: 0.9220 | F1: 0.9220
Epoch 7/20
Train Loss: 0.8520
Test Metrics - Acc: 0.7660 | Prec: 0.9025 | Rec: 0.7660 | F1: 0.7945
Epoch 8/20
Train Loss: 0.8718
Test Metrics - Ac

  model.load_state_dict(torch.load("best_model.pth"))



Final Evaluation with Best Model:
Accuracy: 0.9220 | Precision: 0.9220
Recall: 0.9220 | F1 Score: 0.9220


In [None]:
import zipfile
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import glob
import numpy as np
import random
import torch_geometric.nn as pyg_nn
from torch_geometric.data import Data
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# ========== Reproducibility Setup ==========
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True  # Ensures deterministic algorithms
torch.backends.cudnn.benchmark = False    # Disables non-deterministic algorithms
os.environ['PYTHONHASHSEED'] = str(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========== Data Download ==========
!kaggle datasets download -d azouzmaroua/algeria-ultrasound-images-thyroid-dataset-auitd

# ========== Data Transformations ==========
train_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ========== Dataset & DataLoader ==========
class ThyroidDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = 2 if "normal thyroid" in img_path else (1 if "Malignant" in img_path else 0)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

def load_dataset(root_dir):
    train_paths = glob.glob(os.path.join(root_dir, "train", "*", "*.jpg"))
    test_paths = glob.glob(os.path.join(root_dir, "test", "*", "*.jpg"))
    return train_paths, test_paths

# ========== Data Preparation ==========
batch_size = 32
zip_path = "/content/algeria-ultrasound-images-thyroid-dataset-auitd.zip"
extract_path = "/content/dataset_thyroid"

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_path)

dataset_root = os.path.join(extract_path, "dataset thyroid")
if not os.path.exists(dataset_root):
    dataset_root = extract_path

train_paths, test_paths = load_dataset(dataset_root)
train_dataset = ThyroidDataset(train_paths, transform=train_transforms)
test_dataset = ThyroidDataset(test_paths, transform=test_transforms)

# Initialize generator for reproducibility
def worker_init_fn(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

generator = torch.Generator()
generator.manual_seed(SEED)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    worker_init_fn=worker_init_fn,
    generator=generator,
    drop_last=True  # Ensures consistent batch sizes
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2,
    worker_init_fn=worker_init_fn
)

# ========== Hybrid Model Architecture ==========
class HybridCNNGAT(nn.Module):
    def __init__(self):
        super(HybridCNNGAT, self).__init__()
        self.cnn = models.efficientnet_b4(weights="DEFAULT")
        for param in self.cnn.features[:-3].parameters():
            param.requires_grad = True
        self.cnn.classifier = nn.Identity()
        self.fc1 = nn.Linear(1792, 512)
        self.gat1 = pyg_nn.GATConv(512, 256, heads=4, concat=True, dropout=0.4)
        self.gat2 = pyg_nn.GATConv(256*4, 128, heads=4, concat=True, dropout=0.4)
        self.fc2 = nn.Linear(128*4, 3)

        # Initialize GAT layers with fixed seeds
        torch.manual_seed(SEED)
        self.gat1.reset_parameters()
        torch.manual_seed(SEED)
        self.gat2.reset_parameters()

    def forward(self, x, edge_index):
        cnn_features = self.cnn(x)
        x = torch.relu(self.fc1(cnn_features))
        x = self.gat1(x, edge_index)
        x = self.gat2(x, edge_index)
        return self.fc2(x)

# ========== Graph Construction ==========
def create_edge_index(num_nodes):
    edge_index = []
    for i in range(num_nodes):
        for j in range(i+1, min(i+3, num_nodes)):
            edge_index.append([i, j])
            edge_index.append([j, i])
    return torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# ========== Training Setup ==========
model = HybridCNNGAT().to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
criterion = nn.CrossEntropyLoss()

# ========== Training & Evaluation ==========
def train_model():
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        edge_index = create_edge_index(len(images)).to(device)

        optimizer.zero_grad()
        outputs = model(images, edge_index)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    return total_loss/len(train_loader)

def evaluate_model():
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            edge_index = create_edge_index(len(images)).to(device)
            outputs = model(images, edge_index)
            preds.extend(outputs.argmax(dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, preds)
    precision = precision_score(true_labels, preds, average='weighted', zero_division=1)
    recall = recall_score(true_labels, preds, average='weighted', zero_division=1)
    f1 = f1_score(true_labels, preds, average='weighted', zero_division=1)
    return accuracy, precision, recall, f1

# ========== Main Execution ==========
num_epochs = 20
best_f1 = 0

for epoch in range(num_epochs):
    train_loss = train_model()
    acc, prec, rec, f1 = evaluate_model()

    if f1 > best_f1:
        best_f1 = f1
        torch.save(model.state_dict(), "best_model.pth")

# Load best model for final evaluation
model.load_state_dict(torch.load("best_model.pth"))
final_acc, final_prec, final_rec, final_f1 = evaluate_model()
print("\nFinal Evaluation with Best Model:")
print(f"Accuracy: {final_acc:.4f} | Precision: {final_prec:.4f}")
print(f"Recall: {final_rec:.4f} | F1 Score: {final_f1:.4f}")
