In [1]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)




Mounted at /content/gdrive


In [2]:
!cp /content/gdrive/MyDrive/images_mushrooms.zip /content/
!unzip images_mushrooms

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: images_mushrooms/batch_3/992465/992465_1.jpg  
   creating: images_mushrooms/batch_3/128516/
  inflating: images_mushrooms/batch_3/128516/128516_10.jpeg  
  inflating: images_mushrooms/batch_3/128516/128516_11.jpeg  
   creating: images_mushrooms/batch_3/209793/
  inflating: images_mushrooms/batch_3/209793/209793_9.jpeg  
   creating: images_mushrooms/batch_3/123175/
  inflating: images_mushrooms/batch_3/123175/123175_186.jpg  
  inflating: images_mushrooms/batch_3/123175/123175_160.jpg  
  inflating: images_mushrooms/batch_3/123175/123175_197.jpeg  
  inflating: images_mushrooms/batch_3/123175/123175_184.jpg  
  inflating: images_mushrooms/batch_3/123175/123175_183.jpg  
  inflating: images_mushrooms/batch_3/123175/123175_168.jpeg  
  inflating: images_mushrooms/batch_3/123175/123175_199.jpg  
  inflating: images_mushrooms/batch_3/123175/123175_170.jpeg  
  inflating: images_mushrooms/batch_3/123175/123175_1

In [3]:
!pip install colorama


Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6


In [4]:
import os
import shutil
from datetime import datetime
from colorama import Fore, Style

# Function for timestamped logging
def log(message, color=Fore.GREEN):
    print(f"{color}[{datetime.now().strftime('%H:%M:%S')}] {message}{Style.RESET_ALL}")

# Input and output directories
data_dir = "images_mushrooms"  # Original dataset with batches
train_dir = "to_train"  # Destination for merged dataset

# Ensure the output directory exists
os.makedirs(train_dir, exist_ok=True)

log("Merging all batches into unified class folders...", Fore.CYAN)

# Iterate over batch directories
for batch_folder in os.scandir(data_dir):
    if not batch_folder.is_dir():
        continue

    batch_path = batch_folder.path

    for class_folder in os.scandir(batch_path):
        if not class_folder.is_dir():
            continue

        class_path = class_folder.path
        dest_class_path = os.path.join(train_dir, class_folder.name)
        os.makedirs(dest_class_path, exist_ok=True)

        # Move all images into the unified class folder
        files = list(os.scandir(class_path))
        if files:
            log(f"Merging {len(files)} images from {batch_folder.name}/{class_folder.name} → {dest_class_path}", Fore.YELLOW)
            for file in files:
                src_path = file.path
                dst_path = os.path.join(dest_class_path, file.name)

                # Ensure filename uniqueness with minimal overhead
                base, ext = os.path.splitext(file.name)
                counter = 1
                while os.path.exists(dst_path):
                    dst_path = os.path.join(dest_class_path, f"{base}_{counter}{ext}")
                    counter += 1

                shutil.move(src_path, dst_path)

log("Batch merging complete!", Fore.GREEN)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[33m[15:37:45] Merging 10 images from batch_5/118140 → to_train/118140[0m
[33m[15:37:45] Merging 8 images from batch_5/49138 → to_train/49138[0m
[33m[15:37:45] Merging 2 images from batch_5/118400 → to_train/118400[0m
[33m[15:37:45] Merging 1 images from batch_5/510853 → to_train/510853[0m
[33m[15:37:45] Merging 2 images from batch_5/322931 → to_train/322931[0m
[33m[15:37:45] Merging 10 images from batch_5/901819 → to_train/901819[0m
[33m[15:37:45] Merging 1 images from batch_5/344533 → to_train/344533[0m
[33m[15:37:45] Merging 1 images from batch_5/462268 → to_train/462268[0m
[33m[15:37:45] Merging 2 images from batch_5/499719 → to_train/499719[0m
[33m[15:37:45] Merging 2 images from batch_5/48719 → to_train/48719[0m
[33m[15:37:45] Merging 1 images from batch_5/1590678 → to_train/1590678[0m
[33m[15:37:45] Merging 2 images from batch_5/383085 → to_train/383085[0m
[33m[15:37:45] Merging 1 images fr

In [9]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torchvision import models
from datetime import datetime
from tqdm import tqdm
from colorama import Fore, Style
from PIL import Image
# Function for timestamped logging
def log(message, color=Fore.GREEN):
    print(f"{color}[{datetime.now().strftime('%H:%M:%S')}] {message}{Style.RESET_ALL}")

# Data paths
train_dir = "to_train"

def check_images(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # Verify the image integrity
            except (IOError, SyntaxError, OSError):
                print(f"Corrupted image detected and removed: {file_path}")
                os.remove(file_path)  # Remove the corrupted file

# Check your training dataset
check_images("to_train")

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load dataset
dataset = datasets.ImageFolder(root=train_dir, transform=transform)
log(f"Loaded dataset with {len(dataset)} images", Fore.GREEN)

# Split dataset
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

log(f"Train dataset size: {len(train_dataset)}, Test dataset size: {len(test_dataset)}", Fore.BLUE)

# Load EfficientNet model
log("Loading EfficientNet model...", Fore.CYAN)
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)

# Adjust output layer
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, len(dataset.classes))

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
log(f"Using device: {device}", Fore.MAGENTA)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop with batch-averaged accuracy
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        running_corrects = 0
        running_total = 0

        log(f"Epoch {epoch+1}/{epochs} - Training Started", Fore.YELLOW)

        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch") as pbar:
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                # Compute batch accuracy
                _, predicted = torch.max(outputs, 1)
                running_total += labels.size(0)
                running_corrects += (predicted == labels).sum().item()

                avg_loss = running_loss / (pbar.n + 1)
                avg_acc = 100 * running_corrects / running_total

                pbar.set_postfix(loss=f"{avg_loss:.4f}", acc=f"{avg_acc:.2f}%")
                pbar.update(1)  # Update progress bar

        log(f"Epoch {epoch+1} Completed - Avg Loss: {avg_loss:.4f}, Accuracy: {avg_acc:.2f}%", Fore.GREEN)

# Model evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    running_total = 0

    log("Evaluating Model...", Fore.CYAN)

    with torch.no_grad():
        with tqdm(total=len(test_loader), desc="Testing", unit="batch") as pbar:
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                running_total += labels.size(0)
                running_corrects += (predicted == labels).sum().item()

                avg_loss = running_loss / (pbar.n + 1)
                avg_acc = 100 * running_corrects / running_total

                pbar.set_postfix(loss=f"{avg_loss:.4f}", acc=f"{avg_acc:.2f}%")
                pbar.update(1)

    log(f"Test Accuracy: {avg_acc:.2f}%, Test Loss: {avg_loss:.4f}", Fore.CYAN)

# Start training
train_model(model, train_loader, criterion, optimizer, epochs=15)

# Evaluate after training
evaluate_model(model, test_loader)


[32m[15:42:03] Loaded dataset with 50382 images[0m
[34m[15:42:03] Train dataset size: 40305, Test dataset size: 10077[0m
[36m[15:42:03] Loading EfficientNet model...[0m
[35m[15:42:03] Using device: cuda[0m
[33m[15:42:03] Epoch 1/15 - Training Started[0m


Epoch 1/15:  11%|█         | 139/1260 [00:30<04:03,  4.61batch/s, acc=5.96%, loss=6.8821]


OSError: broken data stream when reading image file

In [None]:

# Define the filename for saving
model_path = "efficientnet_fungi_classifier.pth"

# Save model state dictionary
torch.save(model.state_dict(), model_path)

print(f"Model saved successfully as {model_path}")

# Ensure this is the same directory used during training
train_dir = "to_train"

# Load dataset using ImageFolder (same method used during training)
dataset = datasets.ImageFolder(root=train_dir)

# Get class names in the same order as the model was trained
class_names = dataset.classes  # This ensures correct class indexing

# Save class names to a file
class_names_path = "class_names.txt"

with open(class_names_path, "w") as f:
    for class_name in class_names:
        f.write(class_name + "\n")

print(f"✅ Class names saved to {class_names_path}")
print(f"Classes: {class_names}")



Model saved successfully as efficientnet_bird_classifier.pth


In [None]:
!cp /content/efficientnet_bird_classifier.pth /content/gdrive/MyDrive/


In [9]:
!rm -rf images_mushrooms/