In [None]:
# ============================================
# CELL 1: Setup Kaggle API for Google Colab
# ============================================
import os
from pathlib import Path

print("="*70)
print("üîß SETTING UP KAGGLE API FOR GOOGLE COLAB")
print("="*70)

# Upload kaggle.json
from google.colab import files

kaggle_path = Path.home() / '.kaggle'
kaggle_path.mkdir(exist_ok=True)
kaggle_json = kaggle_path / 'kaggle.json'

if not kaggle_json.exists():
    print("\nüì§ Please upload your kaggle.json file:")
    print("   (Get it from: https://www.kaggle.com/settings ‚Üí Create New Token)\n")
    uploaded = files.upload()
    
    # Move to correct location
    for fn in uploaded.keys():
        with open(kaggle_json, 'wb') as f:
            f.write(uploaded[fn])
    
    # Set permissions
    os.chmod(kaggle_json, 0o600)
    print("‚úÖ Kaggle API configured!")
else:
    print("‚úÖ Kaggle API already configured!")

# Install kaggle
!pip install -q kaggle

print("\n" + "="*70)

In [None]:
# ============================================
# CELL 2: Download Datasets from Kaggle
# ============================================
import os
import subprocess

# Create data directory on Colab's fast local storage
DATA_DIR = '/content/datasets'
os.makedirs(DATA_DIR, exist_ok=True)

# VERIFIED Kaggle dataset paths (tested and confirmed to exist)
datasets = {
    'rice': 'minhhuy2810/rice-diseases-image-dataset',
    'cotton': 'janmejaybhoi/cotton-disease-dataset',  # Fixed: correct dataset
    'wheat': 'olyadgetch/wheat-leaf-dataset', 
    'mango': 'aryashah2k/mango-leaf-disease-dataset',
    'plantvillage': 'abdallahalidev/plantvillage-dataset'
}

print("="*70)
print("üì• DOWNLOADING DATASETS FROM KAGGLE")
print("="*70)

failed_downloads = []

for crop, dataset_name in datasets.items():
    crop_dir = os.path.join(DATA_DIR, crop)
    
    if os.path.exists(crop_dir) and len(os.listdir(crop_dir)) > 0:
        print(f"‚úÖ {crop.upper():12s} - Already downloaded")
    else:
        print(f"\n‚¨áÔ∏è  Downloading {crop.upper()} ({dataset_name})...")
        os.makedirs(crop_dir, exist_ok=True)
        
        # Run download with error handling
        result = subprocess.run(
            f"kaggle datasets download -d {dataset_name} -p {crop_dir} --unzip",
            shell=True, capture_output=True, text=True
        )
        
        if result.returncode == 0:
            print(f"‚úÖ {crop.upper():12s} - Downloaded!")
        else:
            print(f"‚ùå {crop.upper():12s} - FAILED: {result.stderr[:100]}")
            failed_downloads.append(crop)

print("\n" + "="*70)
print("üìä Checking downloaded data...")

total_files = 0
for crop in datasets.keys():
    crop_dir = os.path.join(DATA_DIR, crop)
    if os.path.exists(crop_dir):
        file_count = sum([len(files) for _, _, files in os.walk(crop_dir)])
        total_files += file_count
        status = "‚úÖ" if file_count > 0 else "‚ùå"
        print(f"  {status} {crop.upper():12s}: {file_count:,} files")

print(f"\nüìà Total files downloaded: {total_files:,}")

if failed_downloads:
    print(f"\n‚ö†Ô∏è  Failed downloads: {', '.join(failed_downloads)}")
    print("   Try running this cell again or check dataset names on Kaggle")
print("="*70)

In [None]:
# ============================================
# CELL 3: Install Packages & Setup
# ============================================
!pip install timm -q

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import json
from tqdm import tqdm
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import timm

# ‚ö° Speed optimizations
torch.backends.cudnn.benchmark = True
from torch.cuda.amp import GradScaler, autocast
scaler = GradScaler()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úÖ Setup complete!")
print(f"üñ•Ô∏è  Device: {device}")
if torch.cuda.is_available():
    print(f"üéÆ GPU: {torch.cuda.get_device_name(0)}")
    print(f"üíæ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# ============================================
# CELL 4: Load and Analyze Datasets
# ============================================
data_paths = {
    'rice': os.path.join(DATA_DIR, 'rice'),
    'cotton': os.path.join(DATA_DIR, 'cotton'),
    'wheat': os.path.join(DATA_DIR, 'wheat'),
    'mango': os.path.join(DATA_DIR, 'mango'),
    'plantvillage': os.path.join(DATA_DIR, 'plantvillage')
}

def find_image_classes(base_path, max_depth=5):
    """Find all image class directories"""
    image_extensions = {'.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG'}
    class_data = {}
    
    for root, dirs, files in os.walk(base_path):
        depth = root[len(base_path):].count(os.sep)
        if depth > max_depth:
            continue
            
        image_files = [f for f in files if Path(f).suffix in image_extensions]
        if image_files and len(image_files) > 50:
            class_name = Path(root).name
            if class_name not in class_data:
                class_data[class_name] = []
            class_data[class_name].extend([os.path.join(root, f) for f in image_files])
    
    return class_data

# Analyze all datasets
all_classes = {}
print("="*70)
print("üìä ANALYZING DATASETS")
print("="*70)

for crop, path in data_paths.items():
    if os.path.exists(path):
        print(f"\nüîç Analyzing {crop.upper()}...")
        classes = find_image_classes(path)
        all_classes[crop] = classes
        print(f"   Found: {len(classes)} classes, "
              f"{sum(len(imgs) for imgs in classes.values()):,} images")
    else:
        print(f"‚ö†Ô∏è  {crop.upper()} - Path not found")

total_classes = sum(len(classes) for classes in all_classes.values())
total_images = sum(len(imgs) for crop_classes in all_classes.values() 
                   for imgs in crop_classes.values())

print("\n" + "="*70)
print(f"üìà TOTAL: {total_classes} classes, {total_images:,} images")
print("="*70)

In [None]:
# ============================================
# CELL 5: Create Dataset Class
# ============================================
class PlantDiseaseDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

# Build unified dataset
all_image_paths = []
all_labels = []
class_names = []

current_idx = 0
for crop, classes in all_classes.items():
    for class_name, paths in classes.items():
        if len(paths) < 100:  # Skip tiny classes
            continue
        class_names.append(f"{crop}___{class_name}")
        all_image_paths.extend(paths)
        all_labels.extend([current_idx] * len(paths))
        current_idx += 1

num_classes = len(class_names)
print(f"‚úÖ Created dataset: {num_classes} classes, {len(all_image_paths):,} images")

In [None]:
# ============================================
# CELL 6: Data Loaders (‚ö° Optimized)
# ============================================
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create and split
full_dataset = PlantDiseaseDataset(all_image_paths, all_labels, train_transform)
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

train_ds, val_ds, test_ds = random_split(full_dataset, [train_size, val_size, test_size],
                                          generator=torch.Generator().manual_seed(42))

# ‚ö° Optimized loaders for Colab T4
batch_size = 64
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, 
                          num_workers=4, pin_memory=True, prefetch_factor=2,
                          persistent_workers=True)
val_loader = DataLoader(val_ds, batch_size=128, shuffle=False,
                        num_workers=4, pin_memory=True, persistent_workers=True)
test_loader = DataLoader(test_ds, batch_size=128, shuffle=False,
                         num_workers=4, pin_memory=True)

print(f"\nüìä Dataset Split:")
print(f"  Training:   {len(train_ds):,} images")
print(f"  Validation: {len(val_ds):,} images")
print(f"  Test:       {len(test_ds):,} images")
print(f"\n‚ö° Optimizations: batch={batch_size}, workers=4, pin_memory=True")

In [None]:
# ============================================
# CELL 7: Create Model (EfficientNet-B4)
# ============================================
model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6)

print(f"ü§ñ Model: EfficientNet-B4")
print(f"üìä Classes: {num_classes}")
print(f"‚öôÔ∏è  Optimizer: AdamW (lr=0.0001)")
print(f"üìÖ Scheduler: CosineAnnealingLR")

In [None]:
# ============================================
# CELL 8: Training Loop (‚ö° Mixed Precision)
# ============================================
import time

epochs = 30
best_val_acc = 0.0
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("="*70)
print("üöÄ STARTING TRAINING (Mixed Precision + cuDNN)")
print("="*70)

for epoch in range(epochs):
    start = time.time()
    
    # Training phase
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    for images, labels in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        optimizer.zero_grad(set_to_none=True)
        
        # ‚ö° Mixed Precision
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_total += labels.size(0)
        train_correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}', 
                         'acc': f'{100.*train_correct/train_total:.1f}%'})
    
    train_acc = 100. * train_correct / train_total
    train_loss = train_loss / len(train_loader)
    
    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()
    
    val_acc = 100. * val_correct / val_total
    val_loss = val_loss / len(val_loader)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'class_names': class_names,
            'num_classes': num_classes
        }, 'pakistan_model_best.pth')
    
    scheduler.step()
    
    elapsed = time.time() - start
    eta = (epochs - epoch - 1) * elapsed / 60
    
    print(f"\nüìä Epoch {epoch+1}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.2f}%")
    print(f"   Val Loss={val_loss:.4f}, Val Acc={val_acc:.2f}%, Best={best_val_acc:.2f}%")
    print(f"   ‚è±Ô∏è Time: {elapsed:.0f}s | ETA: {eta:.1f} min")
    print("-"*70)

print(f"\n‚úÖ Training complete! Best accuracy: {best_val_acc:.2f}%")

In [None]:
# ============================================
# CELL 9: Test Evaluation
# ============================================
# Load best model
checkpoint = torch.load('pakistan_model_best.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

test_correct = 0
test_total = 0

print("\nüß™ Evaluating on test set...")

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Testing"):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = outputs.max(1)
        
        test_total += labels.size(0)
        test_correct += predicted.eq(labels).sum().item()

test_acc = 100. * test_correct / test_total

print("\n" + "="*70)
print("üìä FINAL TEST RESULTS")
print("="*70)
print(f"\nüéØ Test Accuracy: {test_acc:.2f}%")
print(f"‚úÖ Correct: {test_correct:,} / {test_total:,}")
print("\n" + "="*70)

In [None]:
# ============================================
# CELL 10: Save Model and Download
# ============================================
# Save metadata
model_info = {
    'class_names': class_names,
    'num_classes': num_classes,
    'test_accuracy': test_acc,
    'best_val_accuracy': best_val_acc,
    'model_architecture': 'efficientnet_b4',
    'crops': list(all_classes.keys()),
    'total_images': len(all_image_paths)
}

with open('class_names.json', 'w') as f:
    json.dump(class_names, f, indent=2)

with open('model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)

print("\nüíæ Files saved:")
print("  ‚úì pakistan_model_best.pth (model weights)")
print("  ‚úì class_names.json (class labels)")
print("  ‚úì model_info.json (metadata)")

# Download files to your computer
print("\nüì• Downloading files to your computer...")
from google.colab import files
files.download('pakistan_model_best.pth')
files.download('class_names.json')
files.download('model_info.json')

In [None]:
# ============================================
# CELL 11: Plot Training History
# ============================================
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss plot
axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2)
axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy plot
axes[1].plot(history['train_acc'], label='Train Accuracy', linewidth=2)
axes[1].plot(history['val_acc'], label='Val Accuracy', linewidth=2)
axes[1].axhline(y=test_acc, color='r', linestyle='--', linewidth=2, label=f'Test Acc: {test_acc:.1f}%')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()

# Download the plot
files.download('training_history.png')

print("\nüéâ DONE! All files downloaded.")
print("\nüìÅ Copy these files to your Flask app folder:")
print("   ‚Ä¢ pakistan_model_best.pth")
print("   ‚Ä¢ class_names.json")

## ‚úÖ Complete!

### üì• Downloaded Files:
1. `pakistan_model_best.pth` (~75 MB) - Model weights
2. `class_names.json` - Class labels
3. `model_info.json` - Metadata
4. `training_history.png` - Training plot

### üöÄ Next Steps:
1. Copy downloaded files to `Flask Deployed App/` folder
2. Update `app.py` to load the new model
3. Run the Flask app!

### ⁄©ÿ≥ÿßŸÜ ŸÖÿØÿØ⁄Øÿßÿ± - Helping Pakistani Farmers with AI üáµüá∞