In [1]:
# Install specific versions to avoid compatibility issues
!pip install roboflow ultralytics==8.3.240

Collecting roboflow
  Downloading roboflow-1.2.11-py3-none-any.whl.metadata (9.7 kB)
Collecting ultralytics==8.3.240
  Downloading ultralytics-8.3.240-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics==8.3.240)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pi-heif<2 (from roboflow)
  Downloading pi_heif-1.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.5 kB)
Collecting pillow-avif-plugin<2 (from roboflow)
  Downloading pillow_avif_plugin-1.5.2-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading 

In [2]:
from roboflow import Roboflow
from ultralytics import YOLO
import os
import shutil
import yaml
from glob import glob

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
API_KEY = "u0leKBS1HrAOJli1hvLI"  # Provided by user

# MAPPING CONFIGURATION
# (Workspace, Project, Version, Target_Class_ID, Target_Class_Name, Oversample_Factor)
dataset_config = [
    # GLOBAL CLASS 0: Wagon (Factor=1)
    ("aispry-ob85t", "wagon-detection-zsnyn", 2, 0, "Wagon", 1),
    ("alisha-nyb7f", "wagon-detection-qxlxh", 1, 0, "Wagon", 1),
    ("wagons-thdfd", "cv-alt", 2, 0, "Wagon", 1),
    
    # GLOBAL CLASS 1: Wagon parts (Factor=2)
    ("db-rail", "train-wagon-cv-project", 3, 1, "Wagon parts", 2),
    
    # GLOBAL CLASS 2: Wagon numbers (Factor=25 - MASSIVE BOOST)
    ("sedykh-marat-dxrw3", "wagon-numbers-detection", 1, 2, "Wagon numbers", 25),
    ("student-ih3dc", "wagon-detection-qc7bh", 1, 2, "Wagon numbers", 25),
    ("wagoncounting", "wagon-numbers-jafet", 1, 2, "Wagon numbers", 25),
]

Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
def remap_and_copy(source_path, dest_path_images, dest_path_labels, target_class_id, oversample_factor=1):
    """
    Copies images and creates remapped label files.
    Supports Oversampling: Copies files multiple times with unique names.
    """
    os.makedirs(dest_path_images, exist_ok=True)
    os.makedirs(dest_path_labels, exist_ok=True)

    # We look for images in the source
    source_images = glob(os.path.join(source_path, 'images', '*'))
    
    for img_path in source_images:
        basename = os.path.basename(img_path)
        name_root, ext = os.path.splitext(basename)
        
        # Check for Label existence first
        src_label_file = os.path.join(source_path, 'labels', f"{name_root}.txt")
        if not os.path.exists(src_label_file):
            continue

        # Prepare Label Content
        with open(src_label_file, 'r') as f:
            lines = f.readlines()
        
        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                # Replace original class_id with target_class_id
                new_line = f"{target_class_id} " + " ".join(parts[1:])
                new_lines.append(new_line)
        
        if not new_lines: continue
        
        # --- OVERSAMPLING LOOP ---
        for i in range(oversample_factor):
            # Unique suffix for duplicates
            suffix = f"_copy{i}" if i > 0 else ""
            new_name_root = f"{name_root}{suffix}"
            
            # Copy Image
            dest_img_path = os.path.join(dest_path_images, f"{new_name_root}{ext}")
            shutil.copy(img_path, dest_img_path)
            
            # Write Label
            dest_label_path = os.path.join(dest_path_labels, f"{new_name_root}.txt")
            with open(dest_label_path, 'w') as f:
                f.write('\n'.join(new_lines))

In [4]:
rf = Roboflow(api_key=API_KEY)
    
# Create Merged Dataset Structure
MERGED_DIR = "railway_hackathon_merged_oversampled"
if os.path.exists(MERGED_DIR): shutil.rmtree(MERGED_DIR) 

for split in ['train', 'valid', 'test']:
    os.makedirs(os.path.join(MERGED_DIR, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(MERGED_DIR, split, 'labels'), exist_ok=True)

print("-" * 60)
print("STEP 1: Downloading & Merging Datasets (With Balancing)")
print("-" * 60)

for workspace, project_id, version, target_id, target_name, factor in dataset_config:
    try:
        print(f"Processing {workspace}/{project_id} v{version} -> Class {target_id} (x{factor})")
        project = rf.workspace(workspace).project(project_id)
        dataset = project.version(version).download("yolov8")
        
        location = dataset.location
        
        # Merge Train, Valid, Test splits
        for split in ['train', 'valid', 'test']:
            src_split_path = os.path.join(location, split)
            if not os.path.exists(src_split_path): 
                continue
                
            dest_images = os.path.join(MERGED_DIR, split, 'images')
            dest_labels = os.path.join(MERGED_DIR, split, 'labels')
            
            remap_and_copy(src_split_path, dest_images, dest_labels, target_id, factor)
            
    except Exception as e:
        print(f"Skipping {project_id}: {e}")

# Create Custom data.yaml
yaml_content = {
    'train': os.path.abspath(os.path.join(MERGED_DIR, 'train', 'images')),
    'val': os.path.abspath(os.path.join(MERGED_DIR, 'valid', 'images')),
    'test': os.path.abspath(os.path.join(MERGED_DIR, 'test', 'images')),
    'nc': 3,
    'names': ['Wagon', 'Wagon parts', 'Wagon numbers']
}

yaml_path = os.path.join(MERGED_DIR, 'data.yaml')
with open(yaml_path, 'w') as f:
    yaml.dump(yaml_content, f)

print("Dataset Preparation Complete!")

------------------------------------------------------------
STEP 1: Downloading & Merging Datasets (With Balancing)
------------------------------------------------------------
Processing aispry-ob85t/wagon-detection-zsnyn v2 -> Class 0 (x1)
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Wagon-detection-2 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 156603/156603 [00:02<00:00, 69622.85it/s]





Extracting Dataset Version Zip to Wagon-detection-2 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4454/4454 [00:00<00:00, 6408.67it/s]


Processing alisha-nyb7f/wagon-detection-qxlxh v1 -> Class 0 (x1)
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in wagon-detection-1 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1715/1715 [00:00<00:00, 7595.99it/s]





Extracting Dataset Version Zip to wagon-detection-1 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 62/62 [00:00<00:00, 7071.68it/s]

Processing wagons-thdfd/cv-alt v2 -> Class 0 (x1)
loading Roboflow workspace...





loading Roboflow project...


Downloading Dataset Version Zip in CV-alt-2 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40144/40144 [00:00<00:00, 50735.19it/s]





Extracting Dataset Version Zip to CV-alt-2 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 766/766 [00:00<00:00, 6036.09it/s]


Processing db-rail/train-wagon-cv-project v3 -> Class 1 (x2)
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Train-Wagon-CV-Project-3 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 128815/128815 [00:01<00:00, 69846.34it/s]





Extracting Dataset Version Zip to Train-Wagon-CV-Project-3 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5442/5442 [00:00<00:00, 7639.09it/s]


Processing sedykh-marat-dxrw3/wagon-numbers-detection v1 -> Class 2 (x25)
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Wagon-numbers-detection-1 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12948/12948 [00:00<00:00, 25865.03it/s]





Extracting Dataset Version Zip to Wagon-numbers-detection-1 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 376/376 [00:00<00:00, 7788.06it/s]


Processing student-ih3dc/wagon-detection-qc7bh v1 -> Class 2 (x25)
loading Roboflow workspace...
loading Roboflow project...
Processing wagoncounting/wagon-numbers-jafet v1 -> Class 2 (x25)
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Wagon-numbers-1 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38165/38165 [00:01<00:00, 36793.59it/s]





Extracting Dataset Version Zip to Wagon-numbers-1 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 500/500 [00:00<00:00, 3019.65it/s]


Dataset Preparation Complete!


In [9]:
# Train
model = YOLO('yolov8s.pt') 

results = model.train(
    data=yaml_path,
    epochs=50,
    imgsz=640,
    batch=64,
    project='railway_hackathon_take5',
    name='merged_model_v4',
    freeze=5,
    lr0=0.001,
    patience=10,
    workers=8,
    cache='disk',
    cos_lr=True
)

Ultralytics 8.3.240 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=64, bgr=0.0, box=7.5, cache=disk, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=railway_hackathon_merged_oversampled/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=5, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=merged_model_v44, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience

In [None]:
import os
from google.colab import files
import shutil

# Path based on your logs
weights_path = '/content/railway_hackathon_take5/merged_model_v4/weights/best.pt'
results_folder = '/content/railway_hackathon_take5/merged_model_v4'

if os.path.exists(weights_path):
    print(f"‚úÖ Model found at: {weights_path}")
    
    # Option 1: Download just the weights (faster)
    print("Downloading best.pt...")
    files.download(weights_path)
    
    # Option 2: Zip the full results (logs, graphs, weights) and download
    print("Zipping full results folder...")
    shutil.make_archive('training_results', 'zip', results_folder)
    files.download('training_results.zip')
    
else:
    print("‚ùå File not found. If the Runtime completely disconnected/reset, the files might be lost.")

In [None]:
from google.colab import files
import os

source_folder = '/content/railway_hackathon_take5'  # Change if your model folder has a different name/path

if not os.path.exists(source_folder):
    print(f"‚ùå Folder not found: {source_folder}")
    print("   Check the exact path with !ls /content/")
else:
    print(f"üìÅ Found folder: {source_folder}")
    print("üì¶ Zipping the folder...")
    
    zip_path = '/content/railway_hackathon_results.zip'
    
    # Use system zip for speed and reliability
    !zip -r {zip_path} {source_folder}
    
    if os.path.exists(zip_path):
        size_mb = os.path.getsize(zip_path) / (1024 * 1024)
        print(f"‚úÖ Zip created: {zip_path} ({size_mb:.1f} MB)")
        print("‚¨áÔ∏è Starting automatic download to your computer...")
        files.download(zip_path)  # This triggers a direct browser download
    else:
        print("‚ùå Zip creation failed.")

In [None]:
!pip install -q google-auth-oauthlib  # One-time if needed

from google.colab import auth
auth.authenticate_user()

import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

service = build('drive', 'v3')

# Manual mount simulation
mount_point = '/content/drive'
if not os.path.exists(mount_point):
    os.makedirs(mount_point)

# Copy your files/zip to a temp spot first (if not already zipped)
source_zip = '/content/railway_hackathon_results.zip'  # Or recreate: !zip -r {source_zip} /content/railway_hackathon_take4

# Upload to Drive root (or change folder_id for a subfolder)
file_metadata = {'name': 'railway_hackathon_results.zip'}
media = MediaFileUpload(source_zip, resumable=True)
uploaded_file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()

print(f"‚úÖ Uploaded to Drive! File ID: {uploaded_file.get('id')}")
print("Go to drive.google.com, search for 'railway_hackathon_results.zip', right-click ‚Üí Download.")