In [9]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
from PIL import Image
import logging

# Set up logging to track invalid files
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [10]:
# 1. Define paths and parameters
DATASET_PATH = "D:\\goats"  # Your dataset path
IMG_SIZE = (224, 224)  # Standard size for CNNs
BATCH_SIZE = 32
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
# 2. Define data transforms
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
# 3. Validate images
def verify_image(img_path):
    try:
        img = Image.open(img_path)
        img.verify()  # Verify image integrity
        img = Image.open(img_path)  # Re-open for format check
        img.convert("RGB")  # Ensure it can be converted to RGB
        return True
    except Exception as e:
        logging.error(f"Invalid image: {img_path}, Error: {str(e)}")
        return False

# 4. Load dataset and filter invalid images
dataset = ImageFolder(root=DATASET_PATH, transform=val_test_transform)
valid_samples = [(path, label) for path, label in dataset.samples if verify_image(path)]
if len(valid_samples) < len(dataset.samples):
    logging.warning(f"Filtered out {len(dataset.samples) - len(valid_samples)} invalid images")
dataset.samples = valid_samples

ERROR: Invalid image: D:\goats\healthy_goat\healthy goat_446.jpeg, Error: cannot identify image file 'D:\\goats\\healthy_goat\\healthy goat_446.jpeg'


In [16]:
# 5. Split dataset into train, validation, and test sets
labels = np.array([label for _, label in dataset.samples])
indices = np.arange(len(dataset))
train_idx, temp_idx, _, temp_labels = train_test_split(
    indices, labels, test_size=0.3, stratify=labels, random_state=42
)
val_idx, test_idx = train_test_split(
    temp_idx, test_size=0.5, stratify=temp_labels, random_state=42
)

# Create subsets for train, validation, and test
train_dataset = Subset(dataset, train_idx)
val_dataset = Subset(dataset, val_idx)
test_dataset = Subset(dataset, test_idx)

# Apply training transforms to train_dataset
train_dataset.dataset.transform = train_transform

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 6. Define the CNN model
class GoatCNN(nn.Module):
    def __init__(self):
        super(GoatCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 28 * 28, 128)  # Adjust based on image size
        self.fc2 = nn.Linear(128, 1)  # Binary classification
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 64 * 28 * 28)  # Flatten
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [17]:
# 7. Initialize model, loss function, and optimizer
model = GoatCNN().to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 8. Training loop
def train_model():
    best_val_loss = float('inf')
    for epoch in range(NUM_EPOCHS):
        # Training
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE).float().unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        val_preds, val_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE).float().unsqueeze(1)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                val_preds.extend(torch.sigmoid(outputs).cpu().numpy().flatten() > 0.5)
                val_labels.extend(labels.cpu().numpy().flatten())
        val_loss /= len(val_loader.dataset)
        val_accuracy = accuracy_score(val_labels, val_preds)

        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Train Loss: {train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "goat_classifier.pth")


In [15]:
# 9. Evaluate on test set
def evaluate_model():
    model.load_state_dict(torch.load("goat_classifier.pth"))
    model.eval()
    test_preds, test_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE).float().unsqueeze(1)
            outputs = model(images)
            test_preds.extend(torch.sigmoid(outputs).cpu().numpy().flatten() > 0.5)
            test_labels.extend(labels.cpu().numpy().flatten())

    print("\nTest Set Evaluation:")
    print(f"Accuracy: {accuracy_score(test_labels, test_preds):.4f}")
    print("Classification Report:")
    print(classification_report(test_labels, test_preds, target_names=["Healthy", "Unhealthy"]))
    print("Confusion Matrix:")
    print(confusion_matrix(test_labels, test_preds))

# 10. Run training and evaluation
if __name__ == "__main__":
    print(f"Using device: {DEVICE}")
    print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}, Test samples: {len(test_dataset)}")
    train_model()
    evaluate_model()

Using device: cpu
Train samples: 648, Val samples: 139, Test samples: 140
Epoch 1/20, Train Loss: 0.8267, Val Loss: 0.6723, Val Accuracy: 0.5180
Epoch 2/20, Train Loss: 0.6857, Val Loss: 0.6731, Val Accuracy: 0.5827
Epoch 3/20, Train Loss: 0.6524, Val Loss: 0.6039, Val Accuracy: 0.6763
Epoch 4/20, Train Loss: 0.5988, Val Loss: 0.5497, Val Accuracy: 0.6978
Epoch 5/20, Train Loss: 0.5279, Val Loss: 0.6013, Val Accuracy: 0.6331
Epoch 6/20, Train Loss: 0.5481, Val Loss: 0.4535, Val Accuracy: 0.7698
Epoch 7/20, Train Loss: 0.4506, Val Loss: 0.3416, Val Accuracy: 0.8489
Epoch 8/20, Train Loss: 0.3886, Val Loss: 0.2966, Val Accuracy: 0.9065
Epoch 9/20, Train Loss: 0.3486, Val Loss: 0.2106, Val Accuracy: 0.9424
Epoch 10/20, Train Loss: 0.2937, Val Loss: 0.2691, Val Accuracy: 0.9209
Epoch 11/20, Train Loss: 0.2594, Val Loss: 0.1659, Val Accuracy: 0.9137
Epoch 12/20, Train Loss: 0.2597, Val Loss: 0.1848, Val Accuracy: 0.9353
Epoch 13/20, Train Loss: 0.2103, Val Loss: 0.1355, Val Accuracy: 0.9424