[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/YOUR_USERNAME/GAN_Chili_Disease/blob/main/colab_setup.ipynb)

# 🎆 GAN + GitHub = Perfect Workflow!

**🚀 New Feature**: Sekarang dengan **GitHub Integration**! 
Tidak perlu upload-download file lagi. Clone sekali, training langsung, hasil auto-sync ke GitHub.

---

# 🌶️ GAN Data Augmentation untuk Klasifikasi Penyakit Tanaman Cabai

Notebook ini untuk menjalankan training GAN di Google Colab dengan optimasi GPU.

**Estimasi waktu training:**
- Demo (1 kelas): 15-20 menit
- Full training (5 kelas): 3-4 jam

**Requirements:**
- Google Colab Pro (untuk GPU T4 atau lebih baik)
- Dataset penyakit tanaman cabai

## 🛠️ Setup Environment

In [None]:
# Check GPU availability
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("⚠️ GPU not available, will use CPU (slower training)")

In [None]:
# Install required packages
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
!pip install Pillow matplotlib tqdm seaborn opencv-python

## 📁 Setup Dataset & Code

**Opsi 1: Clone dari GitHub (Recommended)**
- Push proyek Anda ke GitHub repository
- Clone langsung di Colab dengan satu perintah
- Auto-sync dengan perubahan terbaru

**Opsi 2: Upload ZIP file**
- Compress folder dataset Anda menjadi ZIP
- Upload dan extract di cell berikutnya

**Opsi 3: Google Drive**
- Upload dataset ke Google Drive
- Mount drive dan copy data

In [None]:
# OPTION 1: Clone from GitHub (RECOMMENDED)
# First, push your project to GitHub, then clone here

# Clone your repository
# !git clone https://github.com/YOUR_USERNAME/YOUR_REPO_NAME.git
# %cd YOUR_REPO_NAME

# For this specific project, uncomment and modify:
!git clone https://github.com/YOUR_USERNAME/GAN_Chili_Disease.git
%cd GAN_Chili_Disease

print("✅ Repository cloned successfully!")
print("📁 Current directory:")
!pwd
!ls -la

In [None]:
# Option 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Copy dataset from Drive (adjust path as needed)
# !cp -r "/content/drive/MyDrive/Dataset Original" ./
print("Dataset copied from Google Drive")

# OPTION 2: Upload ZIP file (Alternative)
from google.colab import files
import zipfile

print("Upload your dataset ZIP file:")
uploaded = files.upload()

# Extract ZIP
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        print(f"Extracting {filename}...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('.')
        break

# List extracted contents
!ls -la

In [None]:
# OPTION 3: Mount Google Drive (Alternative)
from google.colab import drive
drive.mount('/content/drive')

# Copy dataset from Drive (adjust path as needed)
# !cp -r "/content/drive/MyDrive/Dataset Original" ./
print("Dataset copied from Google Drive")

In [None]:
# Verify dataset structure
import os

def check_dataset_structure():
    """Check dataset structure"""
    dataset_paths = [
        "Dataset Original/train",  # GitHub clone structure
        "data/train",              # Alternative structure
        "train",                   # Direct upload
        "/content/Dataset Original/train"  # Full path
    ]
    
    dataset_path = None
    for path in dataset_paths:
        if os.path.exists(path):
            dataset_path = path
            break
    
    if not dataset_path:
        print("❌ Dataset not found! Please:")
        print("   1. Clone from GitHub with dataset, OR")
        print("   2. Upload dataset ZIP file")
        return None
    
    print(f"✅ Dataset found at: {dataset_path}")
    
    classes = ['healthy', 'leaf curl', 'leaf spot', 'whitefly', 'yellowish']
    total_images = 0
    
    print("\nDataset summary:")
    for class_name in classes:
        class_path = os.path.join(dataset_path, class_name)
        if os.path.exists(class_path):
            count = len([f for f in os.listdir(class_path) 
                        if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
            total_images += count
            print(f"  {class_name:12}: {count:3d} images")
        else:
            print(f"  {class_name:12}: Not found")
    
    print(f"\nTotal images: {total_images}")
    return dataset_path

# Check if we're in a git repository
if os.path.exists('.git'):
    print("🔗 Git repository detected!")
    !git status
else:
    print("📁 Not a git repository")

DATASET_PATH = check_dataset_structure()

## 🔄 Git Operations & Sync

Jika menggunakan GitHub, Anda bisa sync perubahan dan push hasil training.

In [None]:
# Git operations untuk sync dengan GitHub
def setup_git_config():
    """Setup Git configuration"""
    # Configure git (replace with your info)
    !git config --global user.name "Your Name"
    !git config --global user.email "your.email@example.com"
    print("✅ Git configured successfully!")

def pull_latest_changes():
    """Pull latest changes from GitHub"""
    try:
        !git pull origin main
        print("✅ Latest changes pulled successfully!")
    except Exception as e:
        print(f"⚠️ Pull failed: {e}")

def push_results():
    """Push training results back to GitHub"""
    try:
        # Add results to git
        !git add colab_models/ colab_samples/ colab_augmented/
        !git commit -m "Add Colab training results - $(date)"
        !git push origin main
        print("✅ Results pushed to GitHub successfully!")
    except Exception as e:
        print(f"⚠️ Push failed: {e}")
        print("You may need to setup GitHub authentication")

def check_git_status():
    """Check current git status"""
    if os.path.exists('.git'):
        print("🔗 Git Repository Status:")
        !git status
        print("\n🌿 Recent commits:")
        !git log --oneline -5
    else:
        print("❌ Not a git repository")

# Check current status
check_git_status()

## 🤖 GAN Model Definition

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import time

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters for Colab
IMG_SIZE = 64
BATCH_SIZE = 64 if torch.cuda.is_available() else 32  # Larger batch for GPU
NUM_EPOCHS = 200  # Reduced for Colab time limits
LEARNING_RATE = 0.0002
BETA1 = 0.5
NZ = 100  # Size of latent vector
NGF = 64  # Generator feature map size
NDF = 64  # Discriminator feature map size
NC = 3    # Number of channels

class ChiliDataset(Dataset):
    """Custom dataset untuk memuat gambar cabai"""
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        
        # Load semua gambar dari folder
        for file in os.listdir(root_dir):
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.images.append(os.path.join(root_dir, file))
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image

def weights_init(m):
    """Initialize weights for neural networks"""
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

class Generator(nn.Module):
    def __init__(self, nz, ngf, nc):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.main(input)

class Discriminator(nn.Module):
    def __init__(self, nc, ndf):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

print("✅ GAN models defined successfully!")

## 🚀 Training Functions

In [None]:
def train_gan_colab(data_dir, class_name, target_images=200, epochs=200):
    """
    Train GAN optimized for Google Colab
    """
    print(f"\n=== Training GAN untuk kelas: {class_name} ===")
    print(f"Device: {device}")
    print(f"Epochs: {epochs}")
    print(f"Batch size: {BATCH_SIZE}")
    
    # Data transforms
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    # Load dataset
    dataset = ChiliDataset(data_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    
    print(f"Jumlah gambar asli: {len(dataset)}")
    print(f"Target gambar: {target_images}")
    print(f"Perlu generate: {target_images - len(dataset)} gambar")
    
    # Initialize networks
    netG = Generator(NZ, NGF, NC).to(device)
    netD = Discriminator(NC, NDF).to(device)
    
    # Apply weight initialization
    netG.apply(weights_init)
    netD.apply(weights_init)
    
    # Loss function
    criterion = nn.BCELoss()
    
    # Optimizers
    optimizerD = optim.Adam(netD.parameters(), lr=LEARNING_RATE, betas=(BETA1, 0.999))
    optimizerG = optim.Adam(netG.parameters(), lr=LEARNING_RATE, betas=(BETA1, 0.999))
    
    # Fixed noise for monitoring progress
    fixed_noise = torch.randn(16, NZ, 1, 1, device=device)
    
    # Training loop
    print("Starting Training...")
    start_time = time.time()
    
    G_losses = []
    D_losses = []
    
    # Progress bar for epochs
    epoch_pbar = tqdm(range(epochs), desc=f"Training {class_name}")
    
    for epoch in epoch_pbar:
        epoch_start = time.time()
        
        for i, data in enumerate(dataloader, 0):
            ############################
            # (1) Update D network
            ############################
            netD.zero_grad()
            
            # Train with real batch
            real_batch = data.to(device)
            b_size = real_batch.size(0)
            label = torch.full((b_size,), 1., dtype=torch.float, device=device)
            
            output = netD(real_batch).view(-1)
            errD_real = criterion(output, label)
            errD_real.backward()
            D_x = output.mean().item()
            
            # Train with fake batch
            noise = torch.randn(b_size, NZ, 1, 1, device=device)
            fake = netG(noise)
            label.fill_(0.)
            
            output = netD(fake.detach()).view(-1)
            errD_fake = criterion(output, label)
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            
            errD = errD_real + errD_fake
            optimizerD.step()
            
            ############################
            # (2) Update G network
            ###########################
            netG.zero_grad()
            label.fill_(1.)
            
            output = netD(fake).view(-1)
            errG = criterion(output, label)
            errG.backward()
            D_G_z2 = output.mean().item()
            optimizerG.step()
        
        # Save losses
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        
        # Update progress bar
        epoch_time = time.time() - epoch_start
        epoch_pbar.set_postfix({
            'D_loss': f"{errD.item():.3f}",
            'G_loss': f"{errG.item():.3f}",
            'Time': f"{epoch_time:.1f}s"
        })
        
        # Save sample images periodically
        if epoch % 25 == 0 or epoch == epochs - 1:
            with torch.no_grad():
                fake_sample = netG(fixed_noise).detach().cpu()
                sample_dir = f"colab_samples/{class_name}"
                os.makedirs(sample_dir, exist_ok=True)
                vutils.save_image(fake_sample, f"{sample_dir}/epoch_{epoch}.png", 
                                normalize=True, nrow=4)
    
    total_time = time.time() - start_time
    print(f"\n✅ Training completed in {total_time/60:.1f} minutes")
    
    # Save model
    model_dir = f"colab_models/{class_name}"
    os.makedirs(model_dir, exist_ok=True)
    torch.save(netG.state_dict(), f"{model_dir}/generator.pth")
    torch.save(netD.state_dict(), f"{model_dir}/discriminator.pth")
    
    print(f"Model saved to {model_dir}")
    
    return netG, netD, G_losses, D_losses

def generate_images_colab(generator, class_name, num_images, output_dir="colab_augmented"):
    """Generate images using trained generator"""
    print(f"\nGenerating {num_images} images for {class_name}...")
    
    generator.eval()
    os.makedirs(f"{output_dir}/{class_name}", exist_ok=True)
    
    with torch.no_grad():
        for i in tqdm(range(num_images), desc="Generating"):
            noise = torch.randn(1, NZ, 1, 1, device=device)
            fake_image = generator(noise)
            
            # Convert to PIL and save
            img = fake_image[0].cpu()
            img = (img + 1) / 2.0  # Denormalize
            img = transforms.ToPILImage()(img)
            
            img_name = f"generated_{class_name.replace(' ', '_')}_{i:03d}.jpg"
            img_path = os.path.join(f"{output_dir}/{class_name}", img_name)
            img.save(img_path)
    
    print(f"✅ Generated {num_images} images saved to {output_dir}/{class_name}")

def plot_training_losses(G_losses, D_losses, class_name):
    """Plot training losses"""
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(G_losses, label='Generator', color='blue')
    plt.plot(D_losses, label='Discriminator', color='red')
    plt.title(f'Training Losses - {class_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    # Show latest sample
    sample_path = f"colab_samples/{class_name}"
    if os.path.exists(sample_path):
        sample_files = [f for f in os.listdir(sample_path) if f.endswith('.png')]
        if sample_files:
            latest_sample = max(sample_files, key=lambda x: int(x.split('_')[1].split('.')[0]))
            img = plt.imread(os.path.join(sample_path, latest_sample))
            plt.imshow(img)
            plt.title(f'Latest Generated Samples - {class_name}')
            plt.axis('off')
    
    plt.tight_layout()
    plt.show()

print("✅ Training functions defined successfully!")

## 🎯 Demo Training (Single Class)

Mulai dengan demo training untuk satu kelas terlebih dahulu.

In [None]:
# Demo training untuk satu kelas
if DATASET_PATH:
    # Pilih kelas untuk demo
    demo_class = "healthy"  # Ganti dengan kelas yang tersedia
    demo_epochs = 50        # Epoch minimal untuk demo
    
    demo_data_dir = os.path.join(DATASET_PATH, demo_class)
    
    if os.path.exists(demo_data_dir):
        print(f"🚀 Starting demo training for class: {demo_class}")
        print(f"📁 Data directory: {demo_data_dir}")
        print(f"🔄 Epochs: {demo_epochs}")
        
        # Start training
        generator, discriminator, g_losses, d_losses = train_gan_colab(
            demo_data_dir, demo_class, target_images=120, epochs=demo_epochs
        )
        
        # Plot losses
        plot_training_losses(g_losses, d_losses, demo_class)
        
        # Generate sample images
        generate_images_colab(generator, demo_class, 20)
        
        print(f"\n🎉 Demo completed for {demo_class}!")
        
    else:
        print(f"❌ Class directory not found: {demo_data_dir}")
        print("Available classes:")
        for item in os.listdir(DATASET_PATH):
            if os.path.isdir(os.path.join(DATASET_PATH, item)):
                print(f"  - {item}")
else:
    print("❌ Please upload dataset first")

## 🏭 Full Training (All Classes)

Setelah demo berhasil, jalankan training untuk semua kelas.

In [None]:
# Full training untuk semua kelas
def train_all_classes_colab(dataset_path, epochs=150, target_images=200):
    """Train GAN for all classes"""
    classes = ['healthy', 'leaf curl', 'leaf spot', 'whitefly', 'yellowish']
    
    print("🏭 FULL TRAINING - ALL CLASSES")
    print(f"📊 Target per class: {target_images} images")
    print(f"🔄 Training epochs: {epochs}")
    print("=" * 50)
    
    results = {}
    total_start_time = time.time()
    
    for i, class_name in enumerate(classes, 1):
        print(f"\n[{i}/{len(classes)}] Training class: {class_name}")
        print("-" * 30)
        
        class_dir = os.path.join(dataset_path, class_name)
        
        if not os.path.exists(class_dir):
            print(f"⚠️  Directory not found: {class_dir}")
            continue
        
        try:
            # Train GAN for this class
            generator, discriminator, g_losses, d_losses = train_gan_colab(
                class_dir, class_name, target_images, epochs
            )
            
            # Generate images
            needed_images = target_images - len([f for f in os.listdir(class_dir) 
                                               if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
            
            if needed_images > 0:
                generate_images_colab(generator, class_name, needed_images)
            
            # Plot losses
            plot_training_losses(g_losses, d_losses, class_name)
            
            results[class_name] = {
                'generator': generator,
                'discriminator': discriminator,
                'g_losses': g_losses,
                'd_losses': d_losses
            }
            
            print(f"✅ Completed class {class_name}")
            
        except Exception as e:
            print(f"❌ Error training {class_name}: {str(e)}")
            continue
    
    total_time = time.time() - total_start_time
    print(f"\n🎊 ALL TRAINING COMPLETED!")
    print(f"⏰ Total time: {total_time/3600:.1f} hours")
    print(f"📁 Results saved in:")
    print(f"   - colab_models/     : Trained models")
    print(f"   - colab_augmented/  : Generated images")
    print(f"   - colab_samples/    : Training samples")
    
    return results

# Run full training (uncomment to start)
# WARNING: This will take 3-4 hours
# results = train_all_classes_colab(DATASET_PATH, epochs=150, target_images=200)

## 📊 Evaluation & Visualization

In [None]:
def create_comparison_grid(original_dir, generated_dir, class_name, num_samples=8):
    """Create comparison grid of original vs generated images"""
    fig, axes = plt.subplots(2, num_samples, figsize=(20, 6))
    
    # Original images
    orig_files = [f for f in os.listdir(original_dir) 
                  if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:num_samples]
    
    for i, filename in enumerate(orig_files[:num_samples]):
        img_path = os.path.join(original_dir, filename)
        img = plt.imread(img_path)
        axes[0, i].imshow(img)
        axes[0, i].set_title('Original', fontsize=10)
        axes[0, i].axis('off')
    
    # Generated images
    gen_files = [f for f in os.listdir(generated_dir) 
                 if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:num_samples]
    
    for i, filename in enumerate(gen_files[:num_samples]):
        img_path = os.path.join(generated_dir, filename)
        img = plt.imread(img_path)
        axes[1, i].imshow(img)
        axes[1, i].set_title('Generated', fontsize=10)
        axes[1, i].axis('off')
    
    plt.suptitle(f'Original vs Generated - {class_name}', fontsize=16)
    plt.tight_layout()
    plt.show()

def show_training_progress():
    """Show training progress for all classes"""
    classes = ['healthy', 'leaf curl', 'leaf spot', 'whitefly', 'yellowish']
    
    print("📈 TRAINING PROGRESS SUMMARY")
    print("=" * 50)
    
    for class_name in classes:
        model_dir = f"colab_models/{class_name}"
        sample_dir = f"colab_samples/{class_name}"
        gen_dir = f"colab_augmented/{class_name}"
        
        status = "❌ Not trained"
        if os.path.exists(model_dir) and os.path.exists(f"{model_dir}/generator.pth"):
            status = "✅ Completed"
            
            # Count generated images
            if os.path.exists(gen_dir):
                gen_count = len([f for f in os.listdir(gen_dir) 
                               if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
                status += f" ({gen_count} images)"
        
        print(f"  {class_name:15}: {status}")
    
    print("\n📁 Output directories:")
    print(f"   - colab_models/     : Trained GAN models")
    print(f"   - colab_augmented/  : Generated images for augmentation")
    print(f"   - colab_samples/    : Training progress samples")

# Show current progress
show_training_progress()

In [None]:
# Create comparison grids for trained classes
def show_all_comparisons():
    """Show comparison grids for all trained classes"""
    classes = ['healthy', 'leaf curl', 'leaf spot', 'whitefly', 'yellowish']
    
    for class_name in classes:
        original_dir = os.path.join(DATASET_PATH, class_name) if DATASET_PATH else None
        generated_dir = f"colab_augmented/{class_name}"
        
        if (original_dir and os.path.exists(original_dir) and 
            os.path.exists(generated_dir)):
            
            print(f"\n📊 Comparison for {class_name}:")
            create_comparison_grid(original_dir, generated_dir, class_name)
        else:
            print(f"⚠️  Cannot create comparison for {class_name} - missing data")

# Show comparisons (uncomment after training)
# show_all_comparisons()

## 💾 Download Results

In [None]:
# Create ZIP file for download
import zipfile
from google.colab import files

def create_results_zip():
    """Create ZIP file with all results"""
    zip_filename = "gan_results.zip"
    
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add generated images
        if os.path.exists("colab_augmented"):
            for root, dirs, files in os.walk("colab_augmented"):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path))
        
        # Add trained models
        if os.path.exists("colab_models"):
            for root, dirs, files in os.walk("colab_models"):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path))
        
        # Add sample images
        if os.path.exists("colab_samples"):
            for root, dirs, files in os.walk("colab_samples"):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path))
    
    print(f"✅ Results packed into {zip_filename}")
    return zip_filename

def download_results():
    """Download all results"""
    try:
        zip_file = create_results_zip()
        files.download(zip_file)
        print("📥 Download started!")
    except Exception as e:
        print(f"❌ Download error: {str(e)}")

# Download results (uncomment after training)
# download_results()

## 📝 Summary & GitHub Workflow

### Setup Workflow dengan GitHub:

#### 1. **Persiapan Awal** (Sekali saja):
```bash
# Di komputer lokal:
git init
git add .
git commit -m "Initial commit"
git branch -M main
git remote add origin https://github.com/USERNAME/REPO_NAME.git
git push -u origin main
```

#### 2. **Di Google Colab** (Setiap session):
- Clone repository: `!git clone https://github.com/USERNAME/REPO_NAME.git`
- Jalankan training
- Push hasil: `!git add . && git commit -m "Training results" && git push`



























5. **Use .gitignore** untuk file besar (models opsional)4. **Create branches** untuk eksperimen berbeda3. **Use meaningful commit messages** untuk tracking2. **Commit frequently** untuk backup progress1. **Setup GitHub** dengan SSH keys untuk push yang mudah### Tips untuk workflow optimal:- `.git/`: Version control history- `colab_samples/`: Sample progress training- `colab_augmented/`: Gambar hasil augmentasi  - `colab_models/`: Model GAN terlatih (generator & discriminator)### File yang dihasilkan:4. **Gunakan** dataset augmented untuk training model klasifikasi3. **Gabungkan** dengan dataset asli untuk membuat dataset augmented2. **Download local** jika perlu: `git pull` di komputer1. **Push hasil** ke GitHub menggunakan cell Git operations### Setelah training selesai:- ✅ **Reproducible**: Environment dan code terdokumentasi- ✅ **Collaboration**: Tim bisa akses yang sama- ✅ **Backup otomatis**: Hasil training tersimpan aman- ✅ **Version control**: Track semua perubahan code- ✅ **No upload berulang**: Clone sekali, otomatis latest#### 3. **Keuntungan GitHub Integration**:
## 🎆 Complete GitHub Workflow Summary

### 🚀 **Quick Start with GitHub**:

```python
# 1. Clone your repository
!git clone https://github.com/USERNAME/REPO_NAME.git
%cd REPO_NAME

# 2. Run training
# ... (training cells) ...

# 3. Backup results
manual_backup_now()

# 4. Optional: Auto-backup during long training
auto_backup_training(30)  # Every 30 minutes
```

### 🌿 **Benefits of GitHub Integration**:

| Feature | Without GitHub | With GitHub |
|---------|----------------|-------------|
| **File Access** | Upload setiap kali | Clone sekali aja |



































Sekarang Anda tidak perlu upload-download file lagi! Semua tersync otomatis dengan GitHub. Perfect untuk thesis work! 📚🚀
### 🎉 **Result**: - **Document** semua hyperparameters di README
- Keep **large files** di Git LFS atau external storage- Use **branches** untuk eksperimen berbeda- Setup **auto-backup** untuk training lama- Use **meaningful commit messages** untuk track progress### ✨ **Pro Tips**:```└── .gitignore          # Git ignore rules├── colab_augmented/    # Generated images (auto-generated)├── colab_samples/      # Training samples (auto-generated)├── colab_models/       # Trained models (auto-generated)├── Dataset Original/   # Sample data├── requirements_colab.txt├── COLAB_GUIDE.md      # Documentation├── colab_setup.ipynb   # Main Colab notebook├── src/                 # Source codeGAN_Chili_Disease/```

### 📝 **Repository Structure**:   - Download results dari GitHub ke local   - Monitor progress via GitHub commits

   - Setup auto-backup setiap 30 menit3. **Long Training** (3-4 jam):   - Push final results   - Auto-backup progress   - Run training   - Pull latest changes
   - Clone repo di Colab baru2. **Daily Usage** (Setiap training):   - Setup authentication   - Push local project ke GitHub   - Buat GitHub repo1. **Setup** (Pertama kali):
### 🔄 **Typical Workflow**:| **Version Control** | Manual backup | Otomatis track changes |
| **Collaboration** | Share files manual | Real-time collaboration |
| **Backup** | Download manual | Auto-push ke cloud |
| **Reproducibility** | Setup manual | Environment terdokumentasi |
| **History** | Tidak ada | Full change history |

## 🚀 Setup GitHub Repository

### Langkah-langkah Setup GitHub (First Time):

In [None]:
# 🚀 CREATE GITHUB REPOSITORY
# Run this in your LOCAL computer first, then use in Colab

def show_github_setup_guide():
    """Show step-by-step GitHub setup guide"""
    
    print("🚀 GITHUB SETUP GUIDE")
    print("=" * 40)
    
    print("\n1️⃣ CREATE GITHUB REPOSITORY:")
    print("   - Go to github.com")
    print("   - Click 'New Repository'")
    print("   - Name: 'GAN_Chili_Disease' (or any name)")
    print("   - Make it PUBLIC or PRIVATE")
    print("   - Don't initialize with README (we have files already)")
    
    print("\n2️⃣ LOCAL SETUP (Run in your computer):")
    print("   cd 'c:\\Riset Infromatika\\Python V3\\GAN_Project'")
    print("   git init")
    print("   git add .")
    print("   git commit -m 'Initial GAN project commit'")
    print("   git branch -M main")
    print("   git remote add origin https://github.com/USERNAME/REPO_NAME.git")
    print("   git push -u origin main")
    
    print("\n3️⃣ COLAB USAGE (Every session):")
    print("   !git clone https://github.com/USERNAME/REPO_NAME.git")
    print("   %cd REPO_NAME")
    print("   # Run training...")
    print("   !git add results/")
    print("   !git commit -m 'Add training results'")
    print("   !git push")
    
    print("\n4️⃣ AUTHENTICATION:")
    print("   Option A: Use Personal Access Token")
    print("   Option B: Use GitHub CLI: gh auth login")
    print("   Option C: SSH keys (more secure)")
    
    print("\n📝 IMPORTANT FILES TO INCLUDE:")
    print("   ✅ Source code (src/)")
    print("   ✅ Colab notebooks (.ipynb)")
    print("   ✅ Requirements (requirements_colab.txt)")
    print("   ✅ Documentation (README.md, COLAB_GUIDE.md)")
    print("   ✅ Sample dataset (small subset for testing)")
    print("   ❌ Large datasets (use Git LFS or external storage)")
    print("   ❌ Trained models (can be optional, use releases)")

# Show the guide
show_github_setup_guide()

In [None]:
# 📋 TEMPLATE COMMANDS FOR YOUR PROJECT
# Copy these commands and modify with your GitHub username

def generate_setup_commands():
    """Generate personalized setup commands"""
    
    print("📋 COPY-PASTE COMMANDS")
    print("=" * 40)
    
    github_username = input("👤 Enter your GitHub username: ") or "YOUR_USERNAME"
    repo_name = input("📁 Enter repository name: ") or "GAN_Chili_Disease"
    
    print(f"\n🔗 Your Repository URL: https://github.com/{github_username}/{repo_name}")
    
    print("\n💻 LOCAL SETUP (Windows PowerShell):")
    print(f"""cd "c:\Riset Infromatika\Python V3\GAN_Project"
git init
git add .
git commit -m "Initial GAN project for chili disease classification"
git branch -M main
git remote add origin https://github.com/{github_username}/{repo_name}.git
git push -u origin main""")
    
    print("\n🚀 COLAB CLONE COMMAND:")
    print(f"!git clone https://github.com/{github_username}/{repo_name}.git")
    print(f"%cd {repo_name}")
    
    print("\n💾 PUSH RESULTS COMMAND:")
    print("!git add colab_models/ colab_samples/ colab_augmented/")
    print('!git commit -m "Add Colab training results - $(date)"')
    print("!git push origin main")
    
    print("\n📄 CREATE .gitignore FILE:")
    gitignore_content = """# Large files
*.pth
*.h5
*.pkl
__pycache__/
*.pyc

# Large datasets (optional)
# Dataset Original/
# Dataset Augmented/

# Jupyter checkpoints
.ipynb_checkpoints/

# OS files
.DS_Store
Thumbs.db

# Results (optional - you may want to track these)
# colab_models/
# results/
# outputs/"""
    
    with open('.gitignore', 'w') as f:
        f.write(gitignore_content)
    
    print("✅ .gitignore file created!")
    print("\n📝 Note: Adjust .gitignore based on what you want to track")

# Generate commands
# generate_setup_commands()

In [None]:
# 🔐 GITHUB AUTHENTICATION IN COLAB
# Choose one method for pushing to GitHub

def setup_github_auth():
    """Setup GitHub authentication in Colab"""
    
    print("🔐 GITHUB AUTHENTICATION OPTIONS")
    print("=" * 40)
    
    print("\n🅰️ OPTION A: Personal Access Token (Recommended)")
    print("   1. Go to GitHub Settings > Developer settings > Personal access tokens")
    print("   2. Generate new token (classic)")
    print("   3. Select scopes: 'repo' and 'workflow'")
    print("   4. Copy the token")
    print("   5. Use in Colab:")
    print("      !git remote set-url origin https://TOKEN@github.com/USERNAME/REPO.git")
    
    print("\n🅱️ OPTION B: GitHub CLI")
    print("   !curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg")
    print("   !apt update && apt install gh")
    print("   !gh auth login")
    
    print("\n🄭 OPTION C: SSH Keys (Most Secure)")
    print("   # Generate SSH key in Colab")
    print("   !ssh-keygen -t ed25519 -C 'your.email@example.com'")
    print("   !cat ~/.ssh/id_ed25519.pub")
    print("   # Copy public key to GitHub Settings > SSH keys")
    print("   !git remote set-url origin git@github.com:USERNAME/REPO.git")
    
    print("\n🌟 QUICK SETUP (Replace with your info):")
    
    choice = input("Choose authentication method (A/B/C): ").upper()
    
    if choice == 'A':
        token = input("🔑 Enter your GitHub token: ")
        username = input("👤 Enter your GitHub username: ")
        repo = input("📁 Enter repository name: ")
        
        print(f"\n📝 Run this command:")
        print(f"!git remote set-url origin https://{token}@github.com/{username}/{repo}.git")
        
    elif choice == 'B':
        print("\n📝 Installing GitHub CLI...")
        !curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
        !echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
        !apt update && apt install gh -y
        print("✅ GitHub CLI installed! Run: !gh auth login")
        
    elif choice == 'C':
        email = input("📧 Enter your email: ")
        print(f"\n📝 Run these commands:")
        print(f"!ssh-keygen -t ed25519 -C '{email}' -f ~/.ssh/id_ed25519 -N ''")
        print("!cat ~/.ssh/id_ed25519.pub")
        print("# Copy the output to GitHub Settings > SSH keys")
    
    else:
        print("⚠️ Invalid choice. Please run again.")

# Setup authentication
# setup_github_auth()

In [None]:
# ⚙️ AUTOMATED GITHUB WORKFLOW
# Complete workflow for GitHub integration

def complete_github_workflow():
    """Complete automated GitHub workflow"""
    
    print("⚙️ AUTOMATED GITHUB WORKFLOW")
    print("=" * 40)
    
    # Step 1: Check if git repo exists
    if os.path.exists('.git'):
        print("✅ Git repository detected")
        
        # Pull latest changes
        print("🔄 Pulling latest changes...")
        try:
            !git pull origin main
            print("✅ Pull successful")
        except:
            print("⚠️ Pull failed - continuing anyway")
    else:
        print("❌ No git repository found")
        print("📝 Please clone your repository first:")
        print("   !git clone https://github.com/USERNAME/REPO_NAME.git")
        return False
    
    # Step 2: Check git status
    print("\n🔍 Checking git status...")
    !git status
    
    # Step 3: Setup git config if needed
    print("\n⚙️ Checking git configuration...")
    try:
        username = !git config user.name
        email = !git config user.email
        
        if not username or not email:
            print("⚠️ Git not configured. Setting up...")
            !git config --global user.name "Colab User"
            !git config --global user.email "colab@example.com"
            print("✅ Git configured")
        else:
            print(f"✅ Git configured as: {username[0]} <{email[0]}>")
    except:
        print("⚠️ Could not check git config")
    
    return True

def save_and_push_results(commit_message="Update from Colab training"):
    """Save and push training results to GitHub"""
    
    print(f"\n💾 SAVING RESULTS: {commit_message}")
    print("=" * 40)
    
    # Add files
    print("📝 Adding files to git...")
    
    # Add specific directories
    files_to_add = [
        "colab_models/",
        "colab_samples/", 
        "colab_augmented/",
        "*.png",
        "*.jpg",
        "*.json"
    ]
    
    for file_pattern in files_to_add:
        if os.path.exists(file_pattern.rstrip('*')) or '*' in file_pattern:
            try:
                !git add {file_pattern}
                print(f"  ✅ Added {file_pattern}")
            except:
                print(f"  ⚠️ Could not add {file_pattern}")
    
    # Commit
    print(f"\n📋 Committing changes: {commit_message}")
    try:
        !git commit -m "{commit_message}"
        print("✅ Commit successful")
    except:
        print("⚠️ Commit failed - maybe no changes to commit")
    
    # Push
    print("\n🚀 Pushing to GitHub...")
    try:
        !git push origin main
        print("✅ Push successful!")
        print("🎉 Results are now available on GitHub!")
        return True
    except Exception as e:
        print(f"❌ Push failed: {e}")
        print("📝 You may need to setup authentication first")
        return False

# Run the workflow check
workflow_ready = complete_github_workflow()

if workflow_ready:
    print("\n🎆 GitHub workflow is ready!")
    print("📝 After training, use: save_and_push_results('Training completed')")
else:
    print("\n⚠️ Please setup GitHub repository first")

In [None]:
# 📊 MONITORING & AUTOMATED BACKUP
# Automatically backup training progress

import time
import threading

def auto_backup_training(interval_minutes=30):
    """Automatically backup training progress every X minutes"""
    
    def backup_worker():
        while True:
            time.sleep(interval_minutes * 60)  # Convert to seconds
            
            print(f"\n🔄 AUTO-BACKUP at {time.strftime('%H:%M:%S')}")
            
            # Check if there are any new results
            result_dirs = ['colab_models', 'colab_samples', 'colab_augmented']
            has_new_results = any(os.path.exists(d) and os.listdir(d) for d in result_dirs)
            
            if has_new_results:
                timestamp = time.strftime('%Y-%m-%d_%H-%M')
                commit_msg = f"Auto-backup training progress - {timestamp}"
                
                success = save_and_push_results(commit_msg)
                if success:
                    print(f"✅ Auto-backup successful at {timestamp}")
                else:
                    print(f"❌ Auto-backup failed at {timestamp}")
            else:
                print("📋 No new results to backup")
    
    # Start backup thread
    backup_thread = threading.Thread(target=backup_worker, daemon=True)
    backup_thread.start()
    
    print(f"🔄 Auto-backup started! Will backup every {interval_minutes} minutes")
    print("📝 This will run in background during training")
    
    return backup_thread

def manual_backup_now():
    """Manual backup right now"""
    timestamp = time.strftime('%Y-%m-%d_%H-%M')
    commit_msg = f"Manual backup - {timestamp}"
    
    print(f"\n💾 MANUAL BACKUP - {timestamp}")
    success = save_and_push_results(commit_msg)
    
    if success:
        print(f"🎉 Manual backup completed successfully!")
    else:
        print(f"❌ Manual backup failed")
    
    return success

def show_backup_status():
    """Show current backup status"""
    print("\n📊 BACKUP STATUS")
    print("=" * 40)
    
    # Check git status
    if os.path.exists('.git'):
        print("🔗 Git repository: Active")
        
        # Show recent commits
        print("\n📅 Recent commits:")
        !git log --oneline -5
        
        # Show current changes
        print("\n📝 Current changes:")
        !git status --porcelain
        
        # Show remote URL
        print("\n🌐 Remote repository:")
        !git remote -v
        
    else:
        print("❌ No git repository found")
    
    # Check result directories
    print("\n📁 Result directories:")
    result_dirs = {
        'colab_models': 'Trained models',
        'colab_samples': 'Training samples', 
        'colab_augmented': 'Generated images'
    }
    
    for dir_name, description in result_dirs.items():
        if os.path.exists(dir_name):
            file_count = len([f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))])
            print(f"  ✅ {dir_name:18}: {file_count} files ({description})")
        else:
            print(f"  ❌ {dir_name:18}: Not found")

# Show current status
show_backup_status()

print("\n🚀 BACKUP COMMANDS:")
print("   manual_backup_now()           - Backup right now")
print("   auto_backup_training(30)      - Auto-backup every 30 minutes")
print("   show_backup_status()          - Check current status")