<a href="https://colab.research.google.com/github/JustinGaj/verizon-1a-project-falcon/blob/main/cell_tower_mini_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Mini model for getting rid of images without cell towers

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="ta7eF8BgO0KGqSxF1rpZ")
project = rf.workspace("kalynb700").project("bird-nest-dataset-kpvdq-gc2uo-upzvj")
version = project.version(1)
dataset = version.download("yolov8")


Collecting roboflow
  Downloading roboflow-1.2.11-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pi-heif<2 (from roboflow)
  Downloading pi_heif-1.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.5 kB)
Collecting pillow-avif-plugin<2 (from roboflow)
  Downloading pillow_avif_plugin-1.5.2-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.2.11-py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━

Downloading Dataset Version Zip in Bird-nest-dataset-1 to yolov8:: 100%|██████████| 156526/156526 [00:09<00:00, 15972.33it/s]





Extracting Dataset Version Zip to Bird-nest-dataset-1 in yolov8:: 100%|██████████| 1256/1256 [00:00<00:00, 2449.72it/s]


In [None]:
model = create_model(num_classes=2)
model.load_state_dict(torch.load('best_cell_tower_model.pth'))
model.eval()  # Set to evaluation mode

The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
import os
import shutil
import yaml
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tqdm import tqdm

class CellTowerDataset(Dataset):
    """Dataset for cell tower classification"""
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

def parse_yolo_labels(label_path, cell_class_id, non_cell_class_ids):
    """Parse YOLO format labels to check if cell tower exists"""
    if not os.path.exists(label_path):
        return None

    with open(label_path, 'r') as f:
        lines = f.readlines()

    if not lines:
        return None

    # Check ALL annotations in the file
    has_cell_tower = False
    has_non_cell = False
    all_class_ids = []

    for line in lines:
        parts = line.strip().split()
        if parts:
            class_id = int(parts[0])
            all_class_ids.append(class_id)
            if class_id == cell_class_id:
                has_cell_tower = True
            if class_id in non_cell_class_ids:
                has_non_cell = True

    # Priority: cell tower > non-cell > other
    if has_cell_tower:
        return 'cell'
    elif has_non_cell:
        return 'non_cell'
    else:
        return 'unlabeled'

def inspect_labels(data_dir, num_samples=10):
    """Inspect a few label files to see what's inside"""
    train_labels_dir = os.path.join(data_dir, 'train', 'labels')

    if not os.path.exists(train_labels_dir):
        print(f"Labels directory not found: {train_labels_dir}")
        return

    label_files = [f for f in os.listdir(train_labels_dir) if f.endswith('.txt')][:num_samples]

    print(f"\n=== INSPECTING {num_samples} LABEL FILES ===")
    for label_file in label_files:
        label_path = os.path.join(train_labels_dir, label_file)
        print(f"\nFile: {label_file}")
        with open(label_path, 'r') as f:
            content = f.read()
            if content.strip():
                print(f"Content: {content.strip()}")
            else:
                print("Content: EMPTY")

def filter_dataset(data_dir, class_names):
    """
    Filter dataset:
    - TRAIN: Only use labeled images (cell=1, non/pow=0) for training
    - VALID/TEST: Keep ALL images for prediction (will be labeled by model)

    Args:
        data_dir: Root directory of the dataset (should contain train/valid/test)
        class_names: Dictionary mapping class IDs to names

    Returns:
        Training data and unlabeled validation/test data
    """
    cell_class_id = None
    non_cell_class_ids = []

    # Find class IDs
    for class_id, name in class_names.items():
        if name.lower() == 'cell':
            cell_class_id = class_id
        elif name.lower() in ['non', 'pow']:
            non_cell_class_ids.append(class_id)

    print(f"Cell tower class ID: {cell_class_id}")
    print(f"Non-cell tower class IDs: {non_cell_class_ids}")

    # Training data (labeled only)
    train_data = {'images': [], 'labels': []}

    # Unlabeled data from train (other classes)
    train_unlabeled = []

    # All validation and test images (to be predicted)
    valid_images = []
    test_images = []

    # Debugging counters
    debug_stats = {
        'train': {'total': 0, 'cell': 0, 'non_cell': 0, 'unlabeled': 0},
        'valid': {'total': 0},
        'test': {'total': 0}
    }

    # Process TRAIN split - filter for labeled data only
    images_dir = os.path.join(data_dir, 'train', 'images')
    labels_dir = os.path.join(data_dir, 'train', 'labels')

    if os.path.exists(images_dir):
        for img_file in os.listdir(images_dir):
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue

            debug_stats['train']['total'] += 1

            img_path = os.path.join(images_dir, img_file)
            label_file = os.path.splitext(img_file)[0] + '.txt'
            label_path = os.path.join(labels_dir, label_file)

            label_type = parse_yolo_labels(label_path, cell_class_id, non_cell_class_ids)

            if label_type == 'cell':
                debug_stats['train']['cell'] += 1
                train_data['images'].append(img_path)
                train_data['labels'].append(1)  # Has cell tower
            elif label_type == 'non_cell':
                debug_stats['train']['non_cell'] += 1
                train_data['images'].append(img_path)
                train_data['labels'].append(0)  # No cell tower
            else:
                debug_stats['train']['unlabeled'] += 1
                train_unlabeled.append(img_path)

    # Process VALID split - keep ALL images for prediction
    valid_images_dir = os.path.join(data_dir, 'valid', 'images')
    if os.path.exists(valid_images_dir):
        for img_file in os.listdir(valid_images_dir):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                debug_stats['valid']['total'] += 1
                valid_images.append(os.path.join(valid_images_dir, img_file))

    # Process TEST split - keep ALL images for prediction
    test_images_dir = os.path.join(data_dir, 'test', 'images')
    if os.path.exists(test_images_dir):
        for img_file in os.listdir(test_images_dir):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                debug_stats['test']['total'] += 1
                test_images.append(os.path.join(test_images_dir, img_file))

    # Print detailed statistics
    print("\n=== DATASET STATISTICS ===")
    print(f"\nTRAIN (for training):")
    print(f"  Total images: {debug_stats['train']['total']}")
    print(f"  Cell tower images (label=1): {debug_stats['train']['cell']}")
    print(f"  Non-cell tower images (label=0): {debug_stats['train']['non_cell']}")
    print(f"  Unlabeled (other classes): {debug_stats['train']['unlabeled']}")
    print(f"  → Training set size: {len(train_data['images'])}")

    print(f"\nVALID (for prediction):")
    print(f"  Total images: {debug_stats['valid']['total']}")
    print(f"  → All will be predicted by model")

    print(f"\nTEST (for prediction):")
    print(f"  Total images: {debug_stats['test']['total']}")
    print(f"  → All will be predicted by model")

    return train_data, train_unlabeled, valid_images, test_images

def get_transforms(is_training=True):
    """Get image transformations for training and validation"""
    if is_training:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])
        ])

def create_model(num_classes=2):
    """Create a ResNet50 model for binary classification"""
    model = models.resnet50(pretrained=True)

    # Freeze early layers
    for param in list(model.parameters())[:-15]:
        param.requires_grad = False

    # Replace final layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 256),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(256, num_classes)
    )

    return model

def train_model(model, train_loader, val_loader, num_epochs=20, device='cuda'):
    """Train the classification model"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                       factor=0.5, patience=3)

    model = model.to(device)
    best_val_acc = 0.0
    has_validation = len(val_loader.dataset) > 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_preds = []
        train_labels = []

        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

            pbar.set_postfix({'loss': loss.item()})

        train_loss /= len(train_loader)
        train_acc = accuracy_score(train_labels, train_preds)

        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        # Validation phase
        if has_validation:
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for images, labels in tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]'):
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            val_loss /= len(val_loader)
            val_acc = accuracy_score(val_labels, val_preds)

            scheduler.step(val_loss)
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), 'best_cell_tower_model.pth')
                print(f'Best model saved with validation accuracy: {best_val_acc:.4f}')
        else:
            # Save model periodically when no validation
            torch.save(model.state_dict(), 'best_cell_tower_model.pth')
            print('Model saved (no validation set)')

    return model

def predict_unlabeled(model, image_paths, device='cuda', batch_size=32):
    """Predict labels for unlabeled images"""
    transform = get_transforms(is_training=False)

    # Create dataset without labels
    class UnlabeledDataset(Dataset):
        def __init__(self, image_paths, transform):
            self.image_paths = image_paths
            self.transform = transform

        def __len__(self):
            return len(self.image_paths)

        def __getitem__(self, idx):
            img_path = self.image_paths[idx]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, img_path

    dataset = UnlabeledDataset(image_paths, transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    model.eval()
    predictions = []
    paths = []

    with torch.no_grad():
        for images, img_paths in tqdm(loader, desc='Predicting images'):
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            paths.extend(img_paths)

    return paths, predictions

def remove_non_cell_tower_images(image_paths, predictions, split_name):
    """Remove images that don't contain cell towers"""
    # Get the base directory
    base_dir = os.path.dirname(os.path.dirname(image_paths[0]))
    removed_dir = os.path.join(base_dir, f'removed_non_cell_towers_{split_name}')
    os.makedirs(removed_dir, exist_ok=True)

    removed_count = 0
    kept_count = 0

    for img_path, pred in zip(image_paths, predictions):
        if pred == 0:  # No cell tower
            # Move to removed directory
            filename = os.path.basename(img_path)
            dest_path = os.path.join(removed_dir, filename)
            shutil.move(img_path, dest_path)

            # Also remove corresponding label file if it exists
            label_path = img_path.replace('/images/', '/labels/').replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt')
            if os.path.exists(label_path):
                label_dest = os.path.join(removed_dir, os.path.basename(label_path))
                shutil.move(label_path, label_dest)

            removed_count += 1
        else:
            kept_count += 1

    print(f"\n{split_name.upper()} Cleanup:")
    print(f"  Images kept (have cell towers): {kept_count}")
    print(f"  Images removed (no cell towers): {removed_count}")
    print(f"  Removed images saved to: {removed_dir}")


def clean_dataset_with_model(model_path, data_dir, device='cuda', batch_size=32):
    """
    STANDALONE FUNCTION: Clean a dataset by removing non-cell tower images using a trained model.

    This function can be used independently to clean any dataset with a pre-trained model.
    It will:
    1. Load the trained model from the given path
    2. Scan through train/valid/test splits
    3. Predict which images contain cell towers
    4. Move non-cell tower images to separate folders

    Args:
        model_path (str): Path to the saved model weights (.pth file)
        data_dir (str): Root directory of the dataset (should contain train/valid/test folders)
        device (str): Device to run predictions on ('cuda' or 'cpu')
        batch_size (int): Batch size for predictions

    Returns:
        dict: Statistics about the cleanup operation

    Example usage:
        >>> # Clean a new dataset using pre-trained weights
        >>> stats = clean_dataset_with_model(
        ...     model_path='best_cell_tower_model.pth',
        ...     data_dir='/path/to/new/dataset',
        ...     device='cuda',
        ...     batch_size=32
        ... )
    """
    print('='*60)
    print('DATASET CLEANUP WITH PRE-TRAINED MODEL')
    print('='*60)
    print(f'Model: {model_path}')
    print(f'Dataset: {data_dir}')
    print(f'Device: {device}')
    print('='*60)

    # Load the model
    print('\nLoading model...')
    model = create_model(num_classes=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    print('Model loaded successfully!')

    # Statistics dictionary
    stats = {
        'train': {'total': 0, 'kept': 0, 'removed': 0},
        'valid': {'total': 0, 'kept': 0, 'removed': 0},
        'test': {'total': 0, 'kept': 0, 'removed': 0}
    }

    # Process each split
    splits = ['train', 'valid', 'test']

    for split in splits:
        print(f'\n{"="*60}')
        print(f'Processing {split.upper()} split')
        print(f'{"="*60}')

        images_dir = os.path.join(data_dir, split, 'images')

        if not os.path.exists(images_dir):
            print(f'  ⚠ Directory not found: {images_dir}')
            continue

        # Collect all images
        image_paths = []
        for img_file in os.listdir(images_dir):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(images_dir, img_file))

        if len(image_paths) == 0:
            print(f'  ⚠ No images found in {split} split')
            continue

        stats[split]['total'] = len(image_paths)
        print(f'  Found {len(image_paths)} images')

        # Predict
        print(f'  Predicting...')
        paths, predictions = predict_unlabeled(model, image_paths, device=device, batch_size=batch_size)

        # Count predictions
        cell_count = sum(1 for p in predictions if p == 1)
        no_cell_count = sum(1 for p in predictions if p == 0)
        print(f'  - With cell towers: {cell_count}')
        print(f'  - Without cell towers: {no_cell_count}')

        # Remove non-cell tower images
        print(f'  Cleaning up...')
        remove_non_cell_tower_images(paths, predictions, split)

        stats[split]['kept'] = cell_count
        stats[split]['removed'] = no_cell_count

    # Print summary
    print('\n' + '='*60)
    print('CLEANUP SUMMARY')
    print('='*60)
    total_kept = sum(stats[split]['kept'] for split in splits)
    total_removed = sum(stats[split]['removed'] for split in splits)
    total_processed = sum(stats[split]['total'] for split in splits)

    for split in splits:
        if stats[split]['total'] > 0:
            print(f"\n{split.upper()}:")
            print(f"  Total: {stats[split]['total']}")
            print(f"  Kept: {stats[split]['kept']}")
            print(f"  Removed: {stats[split]['removed']}")

    print(f"\nOVERALL:")
    print(f"  Total processed: {total_processed}")
    print(f"  Total kept: {total_kept}")
    print(f"  Total removed: {total_removed}")
    print(f"  Retention rate: {100*total_kept/total_processed:.1f}%" if total_processed > 0 else "  N/A")

    print('\n' + '='*60)
    print('✓ CLEANUP COMPLETE!')
    print('='*60)
    print('\nNon-cell tower images have been moved to separate folders.')
    print('Your train/valid/test folders now contain only images with cell towers.')

    return stats


def main():
    # Configuration
    DATA_DIR = '/content/Bird-nest-dataset-1'
    BATCH_SIZE = 32
    NUM_EPOCHS = 20
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

    print(f'Using device: {DEVICE}')

    # Load dataset configuration
    yaml_path = os.path.join(DATA_DIR, 'data.yaml')
    with open(yaml_path, 'r') as f:
        data_config = yaml.safe_load(f)

    # Get class names
    class_names = {i: name for i, name in enumerate(data_config['names'])}
    print(f'Classes: {class_names}')

    # Inspect labels
    inspect_labels(DATA_DIR, num_samples=10)

    # Filter dataset
    print('\nFiltering dataset...')
    train_data, train_unlabeled, valid_images, test_images = filter_dataset(DATA_DIR, class_names)

    # Create validation set from training data
    print('\nCreating train/validation split from labeled training data...')

    all_train_images = train_data['images']
    all_train_labels = train_data['labels']

    total_size = len(all_train_images)
    val_size = int(0.2 * total_size)
    train_size = total_size - val_size

    indices = np.random.permutation(total_size)
    train_indices = indices[:train_size]
    val_indices = indices[train_size:]

    train_images = [all_train_images[i] for i in train_indices]
    train_labels = [all_train_labels[i] for i in train_indices]
    val_images_for_training = [all_train_images[i] for i in val_indices]
    val_labels_for_training = [all_train_labels[i] for i in val_indices]

    print(f'Split - train: {len(train_images)}, validation: {len(val_images_for_training)}')

    # Create datasets
    train_dataset = CellTowerDataset(
        train_images,
        train_labels,
        transform=get_transforms(is_training=True)
    )

    val_dataset = CellTowerDataset(
        val_images_for_training,
        val_labels_for_training,
        transform=get_transforms(is_training=False)
    )

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                            shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=2)

    # Create and train model
    print('\nCreating model...')
    model = create_model(num_classes=2)

    print('\nTraining model...')
    model = train_model(model, train_loader, val_loader,
                       num_epochs=NUM_EPOCHS, device=DEVICE)

    # Clean the dataset using the trained model
    print('\n\nNow cleaning the dataset with the trained model...')
    stats = clean_dataset_with_model(
        model_path='best_cell_tower_model.pth',
        data_dir=DATA_DIR,
        device=DEVICE,
        batch_size=BATCH_SIZE
    )


if __name__ == '__main__':
    main()

Using device: cuda
Classes: {0: '0', 1: '1', 2: '10', 3: '11', 4: '12', 5: '13', 6: '14', 7: '2', 8: '4', 9: '5', 10: '6', 11: '7', 12: '9', 13: 'cell', 14: 'non', 15: 'pow'}

=== INSPECTING 10 LABEL FILES ===

File: 1120_jpg.rf.079335c0f3740cde1de7091a60894a0a.txt
Content: 14 0.5006521739130435 0.5 0.9987137681159419 1
0 0.5244565217391305 0.26150121065375304 0.06702898550724638 0.12106537530266344

File: i243_jpg.rf.d5bdb91a05a51dea0e8d4f0f15ae614a.txt
Content: 0 0.5556640625 0.4652777777777778 0.078125 0.09375
13 0.3908203125 0.6590277777777778 0.683984375 0.6819444444444445

File: 419aa31cf7fc9e6d_jpg.rf.03441b9fcfe71db41901fea09bf86550.txt
Content: 0 0.48031496062992124 0.5078947368421053 0.6929133858267716 0.8157894736842105
14 0.4686220472440945 0.5247894736842105 0.937244094488189 0.950421052631579

File: 16468-1_jpg.rf.b5fe2f3d73a9510033bc402e0cf2f22e.txt
Content: 7 0.215625 0.1421875 0.0828125 0.0671875
7 0.21171875 0.31484375 0.0828125 0.0953125
7 0.62109375 0.58125 0.05625 

Epoch 1/20 [Train]: 100%|██████████| 13/13 [00:17<00:00,  1.36s/it, loss=0.189]



Epoch 1/20
Train Loss: 0.4119, Train Acc: 0.8350


Epoch 1/20 [Val]: 100%|██████████| 4/4 [00:03<00:00,  1.23it/s]


Val Loss: 0.1051, Val Acc: 0.9596
Best model saved with validation accuracy: 0.9596


Epoch 2/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.42it/s, loss=0.013]



Epoch 2/20
Train Loss: 0.1070, Train Acc: 0.9625


Epoch 2/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.76it/s]


Val Loss: 0.2195, Val Acc: 0.9495


Epoch 3/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.41it/s, loss=0.00251]



Epoch 3/20
Train Loss: 0.0489, Train Acc: 0.9850


Epoch 3/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]


Val Loss: 0.3104, Val Acc: 0.9394


Epoch 4/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.42it/s, loss=0.000739]



Epoch 4/20
Train Loss: 0.0197, Train Acc: 0.9900


Epoch 4/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.82it/s]


Val Loss: 0.1731, Val Acc: 0.9596


Epoch 5/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.47it/s, loss=0.0646]



Epoch 5/20
Train Loss: 0.0183, Train Acc: 0.9925


Epoch 5/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Val Loss: 0.1512, Val Acc: 0.9697
Best model saved with validation accuracy: 0.9697


Epoch 6/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.57it/s, loss=0.00396]



Epoch 6/20
Train Loss: 0.0155, Train Acc: 0.9975


Epoch 6/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.39it/s]


Val Loss: 0.2043, Val Acc: 0.9394


Epoch 7/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.59it/s, loss=0.00247]



Epoch 7/20
Train Loss: 0.0161, Train Acc: 0.9900


Epoch 7/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.38it/s]


Val Loss: 0.2568, Val Acc: 0.9293


Epoch 8/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.56it/s, loss=0.000376]



Epoch 8/20
Train Loss: 0.0102, Train Acc: 0.9950


Epoch 8/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Val Loss: 0.1864, Val Acc: 0.9495


Epoch 9/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.32it/s, loss=0.000212]



Epoch 9/20
Train Loss: 0.0137, Train Acc: 0.9925


Epoch 9/20 [Val]: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]


Val Loss: 0.2349, Val Acc: 0.9495


Epoch 10/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.39it/s, loss=0.0489]



Epoch 10/20
Train Loss: 0.0047, Train Acc: 0.9975


Epoch 10/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.81it/s]


Val Loss: 0.2696, Val Acc: 0.9495


Epoch 11/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.39it/s, loss=7.63e-5]



Epoch 11/20
Train Loss: 0.0078, Train Acc: 0.9975


Epoch 11/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.81it/s]


Val Loss: 0.2581, Val Acc: 0.9394


Epoch 12/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.39it/s, loss=0.00042]



Epoch 12/20
Train Loss: 0.0057, Train Acc: 0.9975


Epoch 12/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.79it/s]


Val Loss: 0.1765, Val Acc: 0.9394


Epoch 13/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.43it/s, loss=0.000813]



Epoch 13/20
Train Loss: 0.0120, Train Acc: 0.9925


Epoch 13/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.77it/s]


Val Loss: 0.1379, Val Acc: 0.9495


Epoch 14/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.39it/s, loss=0.000838]



Epoch 14/20
Train Loss: 0.0190, Train Acc: 0.9900


Epoch 14/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.79it/s]


Val Loss: 0.1304, Val Acc: 0.9596


Epoch 15/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.44it/s, loss=0.00202]



Epoch 15/20
Train Loss: 0.0019, Train Acc: 1.0000


Epoch 15/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.70it/s]


Val Loss: 0.1229, Val Acc: 0.9596


Epoch 16/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.57it/s, loss=0.00136]



Epoch 16/20
Train Loss: 0.0092, Train Acc: 0.9975


Epoch 16/20 [Val]: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Val Loss: 0.1266, Val Acc: 0.9596


Epoch 17/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.60it/s, loss=0.00221]



Epoch 17/20
Train Loss: 0.0031, Train Acc: 1.0000


Epoch 17/20 [Val]: 100%|██████████| 4/4 [00:03<00:00,  1.26it/s]


Val Loss: 0.1373, Val Acc: 0.9596


Epoch 18/20 [Train]: 100%|██████████| 13/13 [00:07<00:00,  1.64it/s, loss=0.000609]



Epoch 18/20
Train Loss: 0.0061, Train Acc: 0.9975


Epoch 18/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Val Loss: 0.1387, Val Acc: 0.9495


Epoch 19/20 [Train]: 100%|██████████| 13/13 [00:08<00:00,  1.45it/s, loss=0.00991]



Epoch 19/20
Train Loss: 0.0026, Train Acc: 1.0000


Epoch 19/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]


Val Loss: 0.1267, Val Acc: 0.9596


Epoch 20/20 [Train]: 100%|██████████| 13/13 [00:09<00:00,  1.44it/s, loss=0.000407]



Epoch 20/20
Train Loss: 0.0066, Train Acc: 0.9950


Epoch 20/20 [Val]: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]
The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.


Val Loss: 0.1510, Val Acc: 0.9495


Now cleaning the dataset with the trained model...
DATASET CLEANUP WITH PRE-TRAINED MODEL
Model: best_cell_tower_model.pth
Dataset: /content/Bird-nest-dataset-1
Device: cuda

Loading model...
Model loaded successfully!

Processing TRAIN split
  Found 499 images
  Predicting...


Predicting images: 100%|██████████| 16/16 [00:10<00:00,  1.51it/s]


  - With cell towers: 139
  - Without cell towers: 360
  Cleaning up...

TRAIN Cleanup:
  Images kept (have cell towers): 139
  Images removed (no cell towers): 360
  Removed images saved to: /content/Bird-nest-dataset-1/train/removed_non_cell_towers_train

Processing VALID split
  Found 30 images
  Predicting...


Predicting images: 100%|██████████| 1/1 [00:00<00:00,  3.18it/s]


  - With cell towers: 30
  - Without cell towers: 0
  Cleaning up...

VALID Cleanup:
  Images kept (have cell towers): 30
  Images removed (no cell towers): 0
  Removed images saved to: /content/Bird-nest-dataset-1/valid/removed_non_cell_towers_valid

Processing TEST split
  Found 32 images
  Predicting...


Predicting images: 100%|██████████| 1/1 [00:00<00:00,  2.93it/s]

  - With cell towers: 30
  - Without cell towers: 2
  Cleaning up...

TEST Cleanup:
  Images kept (have cell towers): 30
  Images removed (no cell towers): 2
  Removed images saved to: /content/Bird-nest-dataset-1/test/removed_non_cell_towers_test

CLEANUP SUMMARY

TRAIN:
  Total: 499
  Kept: 139
  Removed: 360

VALID:
  Total: 30
  Kept: 30
  Removed: 0

TEST:
  Total: 32
  Kept: 30
  Removed: 2

OVERALL:
  Total processed: 561
  Total kept: 199
  Total removed: 362
  Retention rate: 35.5%

✓ CLEANUP COMPLETE!

Non-cell tower images have been moved to separate folders.
Your train/valid/test folders now contain only images with cell towers.





In [None]:
#download the best model path

from google.colab import files
files.download('best_cell_tower_model.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>