In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import cv2
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from PIL import Image
from datetime import datetime

output_file = 'Softmax_results.txt'
dataset_path = './removed_bg_dataset'

# Parameters
image_size = (256, 256)  # Resize images to reduce computation
num_classes = 10  # Adjust based on the actual number of breeds
batch_size = 32
epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Lists to store data
data = []
labels = []

# Load images from dataset
for breed in os.listdir(dataset_path):
    breed_path = os.path.join(dataset_path, breed)
    if os.path.isdir(breed_path):
        for img_name in os.listdir(breed_path):
            img_path = os.path.join(breed_path, img_name)
            try:
                # Ensure the file is a valid image
                with Image.open(img_path) as img:
                    img.verify()  # Check integrity
                
                # Read and preprocess the image
                image = cv2.imread(img_path)
                image = cv2.resize(image, image_size)  # Resize image
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                data.append(image.flatten())  # Flatten the image
                labels.append(breed)
            except Exception as e:
                print(f"Skipping invalid image: {img_path}")

# Convert lists to NumPy arrays
data = np.array(data, dtype=np.float32) / 255.0  # Normalize pixel values
labels = np.array(labels)

# Encode class labels as integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.long), torch.tensor(y_test, dtype=torch.long)

# Create PyTorch Dataset and DataLoader
class ImageDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

train_dataset = ImageDataset(X_train, y_train)
test_dataset = ImageDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the Softmax model
class SoftmaxModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SoftmaxModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = SoftmaxModel(X_train.shape[1], num_classes).to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct += (outputs.argmax(1) == batch_y).sum().item()
    
    train_acc = correct / len(train_dataset)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Accuracy: {train_acc:.4f}")

# Evaluate the model
model.eval()
correct = 0
total = 0
all_preds = []
all_labels = []
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        predicted = outputs.argmax(1)
        correct += (predicted == batch_y).sum().item()
        total += batch_y.size(0)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

test_acc = correct / total
f1 = f1_score(all_labels, all_preds, average='weighted')
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
results = f"Test Accuracy: {test_acc:.4f} for {dataset_path} \n F1 Score: {f1:.4f} Epochs: {epochs} Batch Size: {batch_size} Image Size: {image_size}"
print(results)

with open(output_file, 'a') as f:
    f.write(f"Report generated at: {current_time}\n{results}\n\n")


Epoch 1/100, Loss: 1782.2490, Accuracy: 0.1046
Epoch 2/100, Loss: 1726.5329, Accuracy: 0.0968
Epoch 3/100, Loss: 1726.8085, Accuracy: 0.1001
Epoch 4/100, Loss: 1728.0163, Accuracy: 0.1002
Epoch 5/100, Loss: 1727.1483, Accuracy: 0.0987
Epoch 6/100, Loss: 1725.6227, Accuracy: 0.1008
Epoch 7/100, Loss: 1722.6236, Accuracy: 0.1056
Epoch 8/100, Loss: 1721.2276, Accuracy: 0.1048
Epoch 9/100, Loss: 1718.6618, Accuracy: 0.1079
Epoch 10/100, Loss: 1717.4776, Accuracy: 0.1101
Epoch 11/100, Loss: 1714.4114, Accuracy: 0.1182
Epoch 12/100, Loss: 1716.2023, Accuracy: 0.1143
Epoch 13/100, Loss: 1715.0104, Accuracy: 0.1155
Epoch 14/100, Loss: 1715.1543, Accuracy: 0.1168
Epoch 15/100, Loss: 1714.4127, Accuracy: 0.1170
Epoch 16/100, Loss: 1711.8344, Accuracy: 0.1172
Epoch 17/100, Loss: 1713.0089, Accuracy: 0.1141
Epoch 18/100, Loss: 1713.6537, Accuracy: 0.1123
Epoch 19/100, Loss: 1712.8067, Accuracy: 0.1163
Epoch 20/100, Loss: 1714.2404, Accuracy: 0.1205
Epoch 21/100, Loss: 1710.6544, Accuracy: 0.1175
E