## üì• Step 1: Clone/Update Repository

In [None]:
import os

repo_url = "https://github.com/Biobay/DeepLearningHard_ISWM"
branch = "main"
repo_name = repo_url.split('/')[-1]

print(f"üì• Clonazione/aggiornamento repository: {repo_url}")

# Check if we're already in the repo directory
current_dir = os.path.basename(os.getcwd())
if current_dir == repo_name:
    print(f"‚úÖ Gi√† nella directory '{repo_name}'. Aggiornamento...")
    !git fetch
    !git pull origin {branch}
    !git checkout {branch}
elif os.path.exists(repo_name):
    print(f"üìÅ Directory '{repo_name}' esiste. Aggiornamento repository...")
    os.chdir(repo_name)
    !git fetch
    !git pull origin {branch}
    !git checkout {branch}
else:
    print("üì¶ Clonazione repository...")
    !git clone -b {branch} {repo_url}
    os.chdir(repo_name)

print(f"\n‚úÖ Working directory: {os.getcwd()}")

## üì¶ Step 2: Installa Dipendenze

In [None]:
print("üì¶ Installazione dipendenze...")

# Installa PyTorch e dipendenze
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu121
!pip install -q opencv-python matplotlib Pillow tqdm tensorboard numpy kaggle

# Verifica installazione
import torch
import sys

print(f"\n‚úÖ Python: {sys.version.split()[0]}")
print(f"‚úÖ PyTorch: {torch.__version__}")
print(f"‚úÖ CUDA disponibile: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    print(f"‚úÖ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
else:
    print("‚ö†Ô∏è  Nessuna GPU disponibile - training sar√† lento!")

## üîë Step 3: Configura Kaggle e Scarica Dataset

In [None]:
# ‚ö†Ô∏è SOSTITUISCI CON LE TUE CREDENZIALI KAGGLE!
KAGGLE_USERNAME = "mariomastrulli"  # ‚Üê Cambia qui
KAGGLE_KEY = "KGAT_08037a2cf26b2f7ffa2612c5b6764b04"  # ‚Üê Cambia qui

import json
from pathlib import Path

# Setup credenziali Kaggle
kaggle_dir = Path.home() / ".kaggle"
kaggle_dir.mkdir(exist_ok=True)

kaggle_config = {
    "username": KAGGLE_USERNAME,
    "key": KAGGLE_KEY
}

kaggle_file = kaggle_dir / "kaggle.json"
with open(kaggle_file, 'w') as f:
    json.dump(kaggle_config, f)

os.chmod(kaggle_file, 0o600)

print("‚úÖ Credenziali Kaggle configurate!")

# Scarica dataset
import zipfile
import shutil

KAGGLE_DATASET = "lakshaymiddha/crack-segmentation-dataset"
dataset_path = Path("dataset")
train_images = dataset_path / "train" / "images"

# Check se gi√† esiste
if train_images.exists() and list(train_images.glob("*.jpg")):
    print(f"\n‚úÖ Dataset gi√† presente ({len(list(train_images.glob('*.jpg')))} immagini)")
else:
    print("\nüì• Download dataset da Kaggle (2.1 GB - pu√≤ richiedere 5-10 minuti)...")
    !kaggle datasets download -d {KAGGLE_DATASET}
    
    # Estrai zip
    zip_files = list(Path(".").glob("*.zip"))
    if zip_files:
        zip_file = zip_files[0]
        print(f"üì¶ Estrazione {zip_file.name}...")
        
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(".")
        
        zip_file.unlink()
        
        # Organizza struttura directory
        print("üìÅ Organizzazione dataset...")
        dataset_path.mkdir(exist_ok=True)
        
        mappings = {
            'train_images': dataset_path / 'train' / 'images',
            'train_masks': dataset_path / 'train' / 'masks',
            'test_images': dataset_path / 'test' / 'images',
            'test_masks': dataset_path / 'test' / 'masks',
        }
        
        for src_name, dest_path in mappings.items():
            src_path = Path(src_name)
            if src_path.exists():
                dest_path.parent.mkdir(parents=True, exist_ok=True)
                shutil.move(str(src_path), str(dest_path))
        
        print("‚úÖ Dataset scaricato e organizzato!")

# Verifica dataset
train_count = len(list((dataset_path / "train" / "images").glob("*.jpg")))
test_count = len(list((dataset_path / "test" / "images").glob("*.jpg")))
print(f"\nüìä Training images: {train_count}")
print(f"üìä Test images: {test_count}")

# Crea directory per output
for dir_name in ['models', 'checkpoints', 'predictions', 'runs']:
    Path(dir_name).mkdir(exist_ok=True)
    
print("\n‚úÖ Setup completato!")

## üöÄ Step 4: TRAINING (50 Epoche)

**Questo richieder√† ~2-3 ore con GPU (Tesla T4/P100)**

- Autoencoder convoluzionale
- MSE Loss per ricostruzione
- Checkpoints automatici ogni 5 epoche
- Resume automatico se interrotto

In [None]:
print("üöÄ Avvio training...")
print("=" * 60)

# Avvia training
!python train.py

print("\n‚úÖ Training completato!")
print("üìÅ Modello salvato in: models/best_autoencoder.pth")

## üîÆ Step 5: INFERENCE

Genera maschere di predizione per tutte le immagini test

In [None]:
print("üîÆ Avvio inference...")
print("=" * 60)

!python inference.py

print("\n‚úÖ Inference completata!")
print("üìÅ Predizioni salvate in: predictions/")

## üìä Step 6: EVALUATION

Calcola metriche IoU, Dice, Precision, Recall e ottimizza threshold

In [None]:
print("üìä Avvio evaluation...")
print("=" * 60)

!python evaluate.py

print("\n‚úÖ Evaluation completata!")
print("üìÅ Risultati salvati in: results_visualization.png, threshold_optimization.png")

## ‚úÖ COMPLETATO!

### üìÅ Risultati disponibili in:

- **Modello**: `models/best_autoencoder.pth`
- **Predizioni**: `predictions/*.jpg`
- **Visualizzazioni**: `results_visualization.png`, `threshold_optimization.png`
- **Checkpoints**: `checkpoints/` (per resume)
- **TensorBoard logs**: `runs/`

### üíæ Download risultati:

```python
# Su Google Colab:
from google.colab import files
files.download('models/best_autoencoder.pth')

# Su Kaggle:
# I file saranno disponibili nell'output del notebook
```

In [None]:
# Summary finale
print("=" * 60)
print("üìä SUMMARY FINALE")
print("=" * 60)

# Model size
model_path = Path("models/best_autoencoder.pth")
if model_path.exists():
    size_mb = model_path.stat().st_size / 1024 / 1024
    print(f"\n‚úÖ Modello: {size_mb:.2f} MB")
else:
    print("\n‚ö†Ô∏è  Modello non trovato")

# Predictions count
predictions = list(Path("predictions").glob("*.jpg"))
print(f"‚úÖ Predizioni generate: {len(predictions)}")

# Visualizations
viz_files = [f for f in Path(".").glob("*.png") if "visualization" in f.name or "threshold" in f.name]
print(f"‚úÖ Visualizzazioni: {len(viz_files)}")
for viz in viz_files:
    print(f"   - {viz.name}")

print("\n" + "=" * 60)
print("üéâ TUTTO COMPLETATO!")
print("=" * 60)