In [ ]:
# ============================================# CELL 1: COMPLETE PROJECT SETUP IN GOOGLE DRIVE# ============================================print("="*60)print("  ROAD DAMAGE DETECTION - COMPLETE SETUP")print("  Location: Google Drive/ML folder")print("="*60)from pathlib import Pathimport osimport subprocessimport sys# ============================================# Mount Google Drive# ============================================print("\n[1/7] MOUNTING GOOGLE DRIVE")print("-"*60)from google.colab import drivedrive.mount('/content/drive')print("   [OK] Google Drive mounted")# ============================================# Create project structure in Drive# ============================================print("\n[2/7] CREATING PROJECT STRUCTURE")print("-"*60)# Project root in your ML folderPROJECT_ROOT = Path("/content/drive/MyDrive/ML/road-damage-detection")# Complete directory structurefolders = [    'notebooks',    'datasets/rdd2022/India/train/images',    'datasets/rdd2022/India/train/annotations/xmls',    'datasets/rdd2022/India/test/images',    'datasets/rdd2022/India/test/annotations/xmls',    'datasets/rdd2022_yolo_india/images/train',    'datasets/rdd2022_yolo_india/images/val',    'datasets/rdd2022_yolo_india/images/test',    'datasets/rdd2022_yolo_india/labels/train',    'datasets/rdd2022_yolo_india/labels/val',    'datasets/rdd2022_yolo_india/labels/test',    'results/exploration',    'results/evaluation',    'runs/detect',    'weights',    'scripts',]print(f"   Creating structure in: {PROJECT_ROOT}")for folder in folders:    path = PROJECT_ROOT / folder    path.mkdir(parents=True, exist_ok=True)print("   [OK] Directory structure created")# Set working directoryos.chdir(PROJECT_ROOT / 'notebooks')print(f"   [OK] Working directory: {os.getcwd()}")# ============================================# Check GPU# ============================================print("\n[3/7] CHECKING GPU")print("-"*60)import torchif not torch.cuda.is_available():    print("   [ERROR] No GPU detected")    print("   Action: Runtime -> Change runtime type -> T4 GPU")    raise RuntimeError("GPU required")gpu_name = torch.cuda.get_device_name(0)gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)print(f"   [OK] GPU: {gpu_name}")print(f"   [OK] Memory: {gpu_memory:.1f} GB")# ============================================# Install packages# ============================================print("\n[4/7] INSTALLING PACKAGES")print("-"*60)packages = [    'ultralytics',      # YOLOv8    'pandas',           # Data manipulation    'numpy',            # Numerical computing    'matplotlib',       # Plotting    'seaborn',          # Statistical visualization    'scikit-learn',     # ML utilities    'opencv-python',    # Image processing    'pillow',           # Image handling    'pyarrow',          # Parquet support    'tqdm',             # Progress bars]print("   Installing packages (1-2 minutes)...")for package in packages:    result = subprocess.run(        [sys.executable, "-m", "pip", "install", "-q", package],        capture_output=True    )print("   [OK] All packages installed")# Verify importsimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom ultralytics import YOLOimport cv2from PIL import Imageimport xml.etree.ElementTree as ETfrom sklearn.model_selection import train_test_splitprint("   [OK] All imports verified")# ============================================# Create configuration file# ============================================print("\n[5/7] CREATING CONFIGURATION")print("-"*60)import jsonconfig = {    'project_root': str(PROJECT_ROOT)    , 'dataset_raw': str(PROJECT_ROOT / 'datasets/rdd2022/India')    , 'dataset_yolo': str(PROJECT_ROOT / 'datasets/rdd2022_yolo_india')    , 'results_dir': str(PROJECT_ROOT / 'results')    , 'runs_dir': str(PROJECT_ROOT / 'runs')    , 'weights_dir': str(PROJECT_ROOT / 'weights')    , 'device': 'cuda'    , 'random_seed': 42    , 'class_mapping': {        'D00': 0,  # Longitudinal cracks        'D10': 1,  # Transverse cracks        'D20': 2,  # Alligator cracks        'D40': 3   # Potholes    },    'class_names': ['D00', 'D10', 'D20', 'D40']}config_file = PROJECT_ROOT / 'config.json'with open(config_file, 'w') as f:    json.dump(config, f, indent=2)print(f"   [OK] Configuration saved: config.json")# Make config available globallyglobals()['PROJECT_ROOT'] = PROJECT_ROOTglobals()['CONFIG'] = config# ============================================# Create README# ============================================print("\n[6/7] CREATING DOCUMENTATION")print("-"*60)readme_content = """# Road Damage Detection - Semester Project## Project Structure```ML/road-damage-detection/├── notebooks/│   ├── 00_complete_setup.ipynb      # Initial setup│   ├── 01_download_dataset.ipynb    # Dataset download│   ├── 02_explore_data.ipynb        # Week 2: EDA│   ├── 03_prepare_splits.ipynb      # Week 3: Data preparation│   └── 04_train_baseline.ipynb      # Week 4: Training├── datasets/│   ├── rdd2022/India/               # Raw dataset│   │   ├── train/ (images + xmls)│   │   └── test/ (images + xmls)│   └── rdd2022_yolo_india/          # Processed YOLO format│       ├── images/ (train/val/test)│       ├── labels/ (train/val/test)│       └── data.yaml├── results/│   ├── exploration/                  # EDA outputs│   └── evaluation/                   # Model evaluation├── runs/detect/                      # Training outputs├── weights/                          # Model checkpoints├── config.json                       # Project configuration└── README.md                         # This file```## Workflow### Week 1: Setup- Environment configuration- Directory structure- Dataset acquisition### Week 2: Exploratory Data Analysis- Parse XML annotations- Analyze class distribution- Visualize damage patterns- Generate statistics### Week 3: Data Preparation- Stratified train/val/test split (80/10/10)- Convert VOC XML to YOLO format- Handle class imbalance (D10)- Create data.yaml### Week 4: Baseline Training- Train YOLOv8n model- Monitor training metrics- Evaluate on validation set- Save checkpoints### Week 5-8: Advanced Work- Model optimization- Comprehensive evaluation- Deployment preparation- Final documentation## Dataset**Source:** RDD2022 Competition**Region:** India**Format:** Pascal VOC XML annotations**Classes:**- D00: Longitudinal cracks- D10: Transverse cracks- D20: Alligator cracks- D40: Potholes## Environment**Platform:** Google Colab**GPU:** T4 GPU (required)**Storage:** Google Drive (persistent)**Framework:** YOLOv8 (Ultralytics)## Notes- All files stored in Google Drive for persistence- Run notebooks in sequence- GPU runtime required for training- Session storage: Use /content/ for temp files only"""readme_file = PROJECT_ROOT / 'README.md'with open(readme_file, 'w') as f:    f.write(readme_content)print("   [OK] README.md created")# Create .gitignoregitignore_content = """# Large files*.jpg*.jpeg*.png*.xml*.pt*.pth# Datasetsdatasets/rdd2022/India/# Resultsruns/*.log# Python__pycache__/*.pyc.ipynb_checkpoints/"""gitignore_file = PROJECT_ROOT / '.gitignore'with open(gitignore_file, 'w') as f:    f.write(gitignore_content)print("   [OK] .gitignore created")# ============================================# Display structure# ============================================print("\n[7/7] PROJECT STRUCTURE")print("-"*60)def display_tree(directory, prefix="", max_depth=2, current_depth=0):    """Display directory tree"""    if current_depth >= max_depth:        return    try:        contents = sorted(directory.iterdir(), key=lambda x: (not x.is_dir(), x.name))        for i, path in enumerate(contents):            is_last = i == len(contents) - 1            current_prefix = "└── " if is_last else "├── "            suffix = "/" if path.is_dir() else "",            print(f"{prefix}{current_prefix}{path.name}{suffix}")            if path.is_dir() and current_depth < max_depth - 1:                extension = "    " if is_last else "│   "                display_tree(path, prefix + extension, max_depth, current_depth + 1)    except:        passdisplay_tree(PROJECT_ROOT)# ============================================# Summary# ============================================print("\n" + "="*60)print("  SETUP COMPLETE")print("="*60)print("\nLocation:")print(f"   {PROJECT_ROOT}")print("\nStatus:")print("   [OK] Project structure created")print("   [OK] Configuration saved")print("   [OK] Documentation created")print("   [OK] GPU ready")print("   [OK] Packages installed")print("\nNext Steps:")print("   1. Run CELL 2 to check for existing dataset")print("   2. If no dataset: Create 01_download_dataset.ipynb")print("   3. Then create 02_explore_data.ipynb for EDA")print("\nImportant:")print("   - All files saved to Google Drive")print("   - Persists across Colab sessions")print("   - Access from: ML/road-damage-detection/")print("\n" + "="*60)