In [None]:
import os
import re
import shutil
import zipfile
from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

# Define paths to the folders
pics_folder = '/content/drive/MyDrive/pics'      # Original pics folder
train_folder = '/content/drive/MyDrive/train'    # Train folder
test_folder = '/content/drive/MyDrive/test'      # Test folder
dataset_folder = '/content/drive/MyDrive/dataset'  # New dataset folder

# Create directories for odd and even license plates
odd_dir = '/content/odd_plates'
even_dir = '/content/even_plates'

os.makedirs(odd_dir, exist_ok=True)
os.makedirs(even_dir, exist_ok=True)

# Regex patterns
# Pattern 1: For filenames like A1033HZ.png or AD3324IT or S4136SI (letters + numbers + letters)
pattern1 = re.compile(r'([A-Z]+)(\d+)([A-Z]+)')

# Pattern 2: For filenames like 351.E 6730 RC-07-19
pattern2 = re.compile(r'.*\s+(\d+)\s+[A-Z]+-\d+-\d+')

# Count variables
odd_count = 0
even_count = 0
unmatched_count = 0
unmatched_files = []

# Dictionary to track file sources
file_sources = {
    'pics': {'odd': 0, 'even': 0, 'unmatched': 0},
    'train': {'odd': 0, 'even': 0, 'unmatched': 0},
    'test': {'odd': 0, 'even': 0, 'unmatched': 0},
    'dataset': {'odd': 0, 'even': 0, 'unmatched': 0}
}

def process_file(file_path, filename, source_folder):
    global odd_count, even_count, unmatched_count, unmatched_files

    # Get basename without extension
    base_name = os.path.splitext(filename)[0]

    # Try pattern 1 first (A1033HZ format or AD3324IT format)
    match1 = pattern1.match(base_name)

    # If pattern 1 doesn't match, try pattern 2 (351.E 6730 RC-07-19 format)
    if match1:
        number_str = match1.group(2)  # This is the middle number part
        if number_str:
            last_digit = int(number_str[-1])
    else:
        match2 = pattern2.match(base_name)
        if match2:
            number_str = match2.group(1)
            if number_str:
                last_digit = int(number_str[-1])
        else:
            unmatched_count += 1
            file_sources[source_folder]['unmatched'] += 1
            unmatched_files.append(f"{source_folder}: {filename}")
            return

    # Copy file to appropriate directory
    if last_digit % 2 == 0:  # Even
        dest_file = os.path.join(even_dir, filename)
        shutil.copy2(file_path, dest_file)
        even_count += 1
        file_sources[source_folder]['even'] += 1
    else:  # Odd
        dest_file = os.path.join(odd_dir, filename)
        shutil.copy2(file_path, dest_file)
        odd_count += 1
        file_sources[source_folder]['odd'] += 1

# Process files from all four folders
folders_to_process = [
    (pics_folder, "pics"),
    (train_folder, "train"),
    (test_folder, "test"),
    (dataset_folder, "dataset")
]

for folder_path, folder_name in folders_to_process:
    if os.path.exists(folder_path):
        print(f"\nProcessing files in {folder_name} folder: {folder_path}")
        files_processed = 0

        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                process_file(os.path.join(folder_path, filename), filename, folder_name)
                files_processed += 1

        print(f"Processed {files_processed} files from {folder_name} folder")
    else:
        print(f"Warning: The directory {folder_path} does not exist. Skipping.")

# Create zip files for easy download
print("\nCreating ZIP files...")
with zipfile.ZipFile('/content/odd_plates.zip', 'w') as odd_zip:
    for file in os.listdir(odd_dir):
        odd_zip.write(os.path.join(odd_dir, file), file)

with zipfile.ZipFile('/content/even_plates.zip', 'w') as even_zip:
    for file in os.listdir(even_dir):
        even_zip.write(os.path.join(even_dir, file), file)

# Summary
print(f"\nOverall Summary:")
print(f"Total odd plates: {odd_count}")
print(f"Total even plates: {even_count}")
print(f"Total unmatched files: {unmatched_count}")

# Detailed summary by folder
print("\nDetailed Summary by Folder:")
for folder_name, counts in file_sources.items():
    print(f"{folder_name.capitalize()} folder:")
    print(f"  - Odd plates: {counts['odd']}")
    print(f"  - Even plates: {counts['even']}")
    print(f"  - Unmatched: {counts['unmatched']}")

if unmatched_count > 0:
    print("\nUnmatched files (first 15):")
    for file in unmatched_files[:15]:
        print(f"- {file}")
    if len(unmatched_files) > 15:
        print(f"... and {len(unmatched_files) - 15} more")

# Download the zip files
print("\nDownloading zip files...")
files.download('/content/odd_plates.zip')
files.download('/content/even_plates.zip')

print("\nDone! Files have been separated and are ready for download.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Processing files in pics folder: /content/drive/MyDrive/pics
Processed 845 files from pics folder

Processing files in train folder: /content/drive/MyDrive/train
Processed 472 files from train folder

Processing files in test folder: /content/drive/MyDrive/test
Processed 30 files from test folder

Processing files in dataset folder: /content/drive/MyDrive/dataset
Processed 358 files from dataset folder

Creating ZIP files...

Overall Summary:
Total odd plates: 788
Total even plates: 772
Total unmatched files: 145

Detailed Summary by Folder:
Pics folder:
  - Odd plates: 422
  - Even plates: 418
  - Unmatched: 5
Train folder:
  - Odd plates: 177
  - Even plates: 172
  - Unmatched: 123
Test folder:
  - Odd plates: 7
  - Even plates: 7
  - Unmatched: 16
Dataset folder:
  - Odd plates: 182
  - Even plates: 175
  - Unmatched: 1

Unmatched files (first 15):
- pics

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Done! Files have been separated and are ready for download.
