# Plant Disease Classification - Model Training
## Using Hugging Face Transformers with ResNet-50

**Dataset:** Warrior025/plant-disease-classification  
**Model:** microsoft/resnet-50 (pre-trained)  
**Task:** Image Classification (8 classes)

---

### Setup Instructions:
1. Open this notebook in Google Colab
2. Go to Runtime → Change runtime type → Select GPU
3. Run all cells
4. Training will take ~15-20 minutes on Colab GPU

In [None]:
# Install required packages
!pip install -q datasets transformers[torch] accelerate scikit-learn pillow

In [None]:
# Imports
from datasets import load_dataset
from transformers import (
    AutoImageProcessor,
    AutoModelForImageClassification,
    TrainingArguments,
    Trainer
)
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Configuration
DATASET_NAME = "Warrior025/plant-disease-classification"
MODEL_NAME = "microsoft/resnet-50"
OUTPUT_DIR = "./plant-disease-model"
NUM_EPOCHS = 10
BATCH_SIZE = 32  # Increase for GPU
LEARNING_RATE = 2e-5

print("Configuration:")
print(f"  Dataset: {DATASET_NAME}")
print(f"  Model: {MODEL_NAME}")
print(f"  Epochs: {NUM_EPOCHS}")
print(f"  Batch Size: {BATCH_SIZE}")

In [None]:
# Load dataset
print("Loading dataset...")
dataset = load_dataset(DATASET_NAME)

print(f"\nDataset loaded!")
print(f"  Train: {len(dataset['train'])} images")
print(f"  Validation: {len(dataset['validation'])} images")
print(f"  Test: {len(dataset['test'])} images")

# Get labels
labels = dataset["train"].features["label"].names
num_labels = len(labels)
print(f"\nClasses ({num_labels}):")
for i, label in enumerate(labels):
    print(f"  {i}: {label}")

In [None]:
# Load image processor and preprocess
print("Loading image processor...")
image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)

def preprocess_images(examples):
    """Preprocess images for the model"""
    images = [img.convert("RGB") for img in examples["image"]]
    inputs = image_processor(images, return_tensors="pt")
    inputs["labels"] = examples["label"]
    return inputs

print("Preprocessing images...")
dataset = dataset.map(preprocess_images, batched=True, batch_size=32)
print("Preprocessing complete!")

In [None]:
# Load model
print("Loading model...")
model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label={i: label for i, label in enumerate(labels)},
    label2id={label: i for i, label in enumerate(labels)},
    ignore_mismatched_sizes=True
)
print("Model loaded!")

In [None]:
# Define metrics
def compute_metrics(eval_pred):
    """Compute accuracy, precision, recall, F1"""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted'
    )
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
    report_to="none",
    remove_unused_columns=False,
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics,
)

print("Trainer ready!")

In [None]:
# Train the model
print("Starting training...")
print("This will take ~15-20 minutes on Colab GPU\n")

train_results = trainer.train()

print("\nTraining complete!")

In [None]:
# Evaluate on validation set
print("Evaluating on validation set...")
val_metrics = trainer.evaluate()

print("\nValidation Results:")
print(f"  Accuracy:  {val_metrics['eval_accuracy']:.4f}")
print(f"  Precision: {val_metrics['eval_precision']:.4f}")
print(f"  Recall:    {val_metrics['eval_recall']:.4f}")
print(f"  F1 Score:  {val_metrics['eval_f1']:.4f}")

In [None]:
# Evaluate on test set
print("Evaluating on test set...")
test_metrics = trainer.evaluate(dataset["test"])

print("\nTest Results:")
print(f"  Accuracy:  {test_metrics['eval_accuracy']:.4f}")
print(f"  Precision: {test_metrics['eval_precision']:.4f}")
print(f"  Recall:    {test_metrics['eval_recall']:.4f}")
print(f"  F1 Score:  {test_metrics['eval_f1']:.4f}")

In [None]:
# Save model
print("Saving model...")
trainer.save_model(OUTPUT_DIR)
image_processor.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

# Download model files
print("\nTo download the model, run:")
print("from google.colab import files")
print("!zip -r plant-disease-model.zip plant-disease-model")
print("files.download('plant-disease-model.zip')")

## Results Summary

Your model has been trained! Key metrics:
- Training completed successfully
- Validation and test metrics calculated
- Model saved and ready for deployment

### Next Steps:
1. Download the trained model
2. Create Streamlit demo app
3. Document results in your report
4. Screenshot metrics for documentation