In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter

In [2]:
CONFIG = {
    "data_path": "/kaggle/input/teeth-disease-classification/Teeth_Dataset",
    "num_classes": 7,
    "batch_size": 32,
    "num_epochs": 15,
    "learning_rate": 0.001,
    # Pre-trained models expect 224x224 images
    "image_size": (224, 224),
    "class_names": ["CaS", "CoS", "Gum", "MC", "OC", "OLP", "OT"],
    "model_save_path": "dental_classifier_model.pth"
}

In [3]:

def get_transforms(image_size):
    # Standard transforms for ImageNet-trained models
    train_transforms = transforms.Compose([
        transforms.Resize(image_size),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    val_transforms = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    return train_transforms, val_transforms

In [4]:
class DentalDataset(Dataset):
    def __init__(self, data_dir, class_map, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.class_map = class_map
        self.image_paths, self.labels = self._load_dataset()

    def _load_dataset(self):
        image_paths = []
        labels = []
        if not os.path.isdir(self.data_dir):
            raise FileNotFoundError(f"Data directory not found: {self.data_dir}")
        for class_name, label_idx in self.class_map.items():
            class_dir = os.path.join(self.data_dir, class_name)
            if os.path.isdir(class_dir):
                for img_name in os.listdir(class_dir):
                    image_paths.append(os.path.join(class_dir, img_name))
                    labels.append(label_idx)
        return image_paths, labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


## Model Architecture (Transfer Learning)

In [5]:
def get_model(num_classes, pretrained=True):
    # Load a pre-trained ResNet-50 model
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1 if pretrained else None)

    # Freeze all the parameters in the model
    for param in model.parameters():
        param.requires_grad = False

    # Get the number of input features for the classifier
    num_ftrs = model.fc.in_features

    # Replace the final fully connected layer with a new one for our specific task
    model.fc = nn.Linear(num_ftrs, num_classes)

    return model

## Training and Evaluation Loop

In [8]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    print("\n--- Starting Model Training (Transfer Learning) ---")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        val_accuracy = evaluate_model(model, val_loader, device, None)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")
    print("--- Finished Training ---")

def evaluate_model(model, data_loader, device, set_name="Test"):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    if set_name:
        print(f"Accuracy on the {set_name} set: {accuracy:.2f} %")
    return accuracy

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

train_dir = os.path.join(CONFIG["data_path"], "Training")
val_dir = os.path.join(CONFIG["data_path"], "Validation")
test_dir = os.path.join(CONFIG["data_path"], "Testing")
train_transform, val_transform = get_transforms(CONFIG["image_size"])

class_map = {name: i for i, name in enumerate(CONFIG["class_names"])}

try:
    train_dataset = DentalDataset(train_dir, class_map, train_transform)
    val_dataset = DentalDataset(val_dir, class_map, val_transform)
    test_dataset = DentalDataset(test_dir, class_map, val_transform)

    train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"], shuffle=False)

    print(f"Loaded {len(train_dataset)} images for training.")
    print(f"Loaded {len(val_dataset)} images for validation.")
    print(f"Loaded {len(test_dataset)} images for testing.")
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()

model = get_model(CONFIG["num_classes"]).to(device)

# We only want to train the parameters of the final layer
params_to_update = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(params_to_update, lr=CONFIG["learning_rate"])
criterion = nn.CrossEntropyLoss()

train_model(model, train_loader, val_loader, criterion, optimizer, CONFIG["num_epochs"], device)

print("\n--- Final Evaluation ---")
evaluate_model(model, test_loader, device, "Test")

print(f"\nSaving model to {CONFIG['model_save_path']}...")
torch.save(model.state_dict(), CONFIG['model_save_path'])
print("Model saved successfully.")

Using device: cuda
Loaded 3087 images for training.
Loaded 1028 images for validation.
Loaded 1028 images for testing.


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 190MB/s]



--- Starting Model Training (Transfer Learning) ---
Epoch [1/15], Loss: 1.5233, Val Accuracy: 63.81%
Epoch [2/15], Loss: 1.1295, Val Accuracy: 64.59%
Epoch [3/15], Loss: 0.9966, Val Accuracy: 62.84%
Epoch [4/15], Loss: 0.9408, Val Accuracy: 69.26%
Epoch [5/15], Loss: 0.8641, Val Accuracy: 71.79%
Epoch [6/15], Loss: 0.8303, Val Accuracy: 66.15%
Epoch [7/15], Loss: 0.8304, Val Accuracy: 67.32%
Epoch [8/15], Loss: 0.7863, Val Accuracy: 75.00%
Epoch [9/15], Loss: 0.7454, Val Accuracy: 75.39%
Epoch [10/15], Loss: 0.7527, Val Accuracy: 71.40%
Epoch [11/15], Loss: 0.7129, Val Accuracy: 75.19%
Epoch [12/15], Loss: 0.7013, Val Accuracy: 76.85%
Epoch [13/15], Loss: 0.6775, Val Accuracy: 75.58%
Epoch [14/15], Loss: 0.6949, Val Accuracy: 75.19%
Epoch [15/15], Loss: 0.6748, Val Accuracy: 75.39%
--- Finished Training ---

--- Final Evaluation ---
Accuracy on the Test set: 76.85 %

Saving model to dental_classifier_model.pth...
Model saved successfully.
