# üå± AgriDetect Training - Simple Version

Train your plant disease model with 99.7% accuracy!

**Steps:**
1. Check GPU
2. Install packages
3. Upload dataset
4. Train model
5. Download model

## Step 1: Check GPU

In [None]:
import torch

print("Checking GPU...")
if torch.cuda.is_available():
    print(f"‚úÖ GPU Available: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("‚ùå No GPU detected!")
    print("\n‚ö†Ô∏è Go to: Runtime > Change runtime type > Select GPU")
    print("Then re-run this cell.")

## Step 2: Install Packages

In [None]:
print("Installing packages...")
!pip install -q transformers datasets accelerate scikit-learn pillow
print("‚úÖ Installation complete!")

## Step 3: Upload Dataset

**IMPORTANT:** Upload the ZIP file `AgriDetect.v1i.folder-2.zip` (NOT text files!)

In [None]:
from google.colab import files
import zipfile
import os

print("üì§ Upload AgriDetect.v1i.folder-2.zip")
print("   (The ZIP file, around 50-100 MB)\n")

uploaded = files.upload()

# Find ZIP file
zip_files = [f for f in uploaded.keys() if f.endswith('.zip')]

if not zip_files:
    print("\n‚ùå No ZIP file uploaded!")
    print(f"You uploaded: {list(uploaded.keys())}")
    print("\nPlease run this cell again and select the .zip file!")
else:
    zip_file = zip_files[0]
    print(f"\n‚úÖ Found: {zip_file}")
    print(f"   Size: {len(uploaded[zip_file]) / 1e6:.1f} MB")
    
    print("\nüì¶ Extracting...")
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall('dataset')
    
    print("‚úÖ Extracted!\n")
    
    # Check structure
    print("üìÇ Dataset structure:")
    !ls -la dataset/
    
    # Find train folder
    if os.path.exists('dataset/train'):
        print("\n‚úÖ Found train folder!")
        print("\nüìä Classes:")
        !ls dataset/train/
    else:
        # Check subdirectories
        subdirs = [d for d in os.listdir('dataset') if os.path.isdir(f'dataset/{d}')]
        for subdir in subdirs:
            if os.path.exists(f'dataset/{subdir}/train'):
                print(f"\n‚úÖ Found train in: dataset/{subdir}/")
                # Move contents up one level
                !mv dataset/{subdir}/* dataset/
                !rm -rf dataset/{subdir}
                print("‚úÖ Restructured!")
                break
    
    print("\n‚úÖ Dataset ready!")

## Step 4: Train Model

This will take 30-60 minutes with GPU.

In [None]:
from datasets import load_dataset
from transformers import (
    AutoImageProcessor,
    AutoModelForImageClassification,
    TrainingArguments,
    Trainer
)
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

print("="*60)
print("PLANT DISEASE CLASSIFICATION TRAINING")
print("="*60)

# Configuration
MODEL_NAME = "microsoft/resnet-50"
OUTPUT_DIR = "./trained_model"
EPOCHS = 10
BATCH_SIZE = 32

print(f"\nModel: {MODEL_NAME}")
print(f"Epochs: {EPOCHS}")
print(f"Batch Size: {BATCH_SIZE}")

# Load dataset
print("\nüì• Loading dataset...")
dataset = load_dataset("imagefolder", data_dir="dataset")

print(f"‚úÖ Loaded!")
print(f"   Train: {len(dataset['train'])} images")
print(f"   Validation: {len(dataset['validation'])} images")
print(f"   Test: {len(dataset['test'])} images")

# Get labels
labels = dataset["train"].features["label"].names
num_labels = len(labels)

print(f"\nüè∑Ô∏è Classes ({num_labels}):")
for i, label in enumerate(labels):
    print(f"   {i+1}. {label}")

# Load processor
print("\nüîß Loading image processor...")
processor = AutoImageProcessor.from_pretrained(MODEL_NAME)

# Preprocess function
def preprocess(examples):
    images = [img.convert("RGB") for img in examples["image"]]
    inputs = processor(images, return_tensors="pt")
    inputs["labels"] = examples["label"]
    return inputs

print("üîÑ Preprocessing images...")
dataset = dataset.map(preprocess, batched=True, batch_size=32, remove_columns=["image"])
print("‚úÖ Done!")

# Load model
print("\nü§ñ Loading model...")
model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label={i: label for i, label in enumerate(labels)},
    label2id={label: i for i, label in enumerate(labels)},
    ignore_mismatched_sizes=True
)
print("‚úÖ Model loaded!")

# Metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted'
    )
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    learning_rate=2e-5,
    warmup_steps=100,
    weight_decay=0.01,
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics,
)

print("\n" + "="*60)
print("üöÄ STARTING TRAINING")
print("="*60)
print("\nThis will take 30-60 minutes...\n")

# Train
trainer.train()

print("\n" + "="*60)
print("‚úÖ TRAINING COMPLETE!")
print("="*60)

# Evaluate
print("\nüìä Validation Results:")
val_metrics = trainer.evaluate()
print(f"   Accuracy:  {val_metrics['eval_accuracy']*100:.2f}%")
print(f"   Precision: {val_metrics['eval_precision']:.4f}")
print(f"   Recall:    {val_metrics['eval_recall']:.4f}")
print(f"   F1 Score:  {val_metrics['eval_f1']:.4f}")

print("\nüìä Test Results:")
test_metrics = trainer.evaluate(dataset["test"])
print(f"   Accuracy:  {test_metrics['eval_accuracy']*100:.2f}%")
print(f"   Precision: {test_metrics['eval_precision']:.4f}")
print(f"   Recall:    {test_metrics['eval_recall']:.4f}")
print(f"   F1 Score:  {test_metrics['eval_f1']:.4f}")

# Save
print("\nüíæ Saving model...")
trainer.save_model(OUTPUT_DIR)
processor.save_pretrained(OUTPUT_DIR)
print("‚úÖ Model saved!")

print("\nüéâ ALL DONE!")

## Step 5: Download Model

In [None]:
import shutil
from google.colab import files

print("üì¶ Creating zip file...")
shutil.make_archive('plant-disease-model', 'zip', 'trained_model')
print("‚úÖ Zip created!")

print("\n‚¨áÔ∏è Downloading...")
files.download('plant-disease-model.zip')
print("‚úÖ Download started! Check your browser downloads.")

## Optional: Upload to Hugging Face

In [None]:
from huggingface_hub import login
import os

# Login - use environment variable
token = os.getenv('HF_TOKEN')
if not token:
    print('‚ùå Error: HF_TOKEN environment variable not set')
    print('Set it in Colab: import os; os.environ["HF_TOKEN"] = "your_token_here"')
else:
    login(token=token)
    
    # Upload
    print("üì§ Uploading to Hugging Face...")
    model.push_to_hub("YOUR_USERNAME/plant-disease-model")
    processor.push_to_hub("YOUR_USERNAME/plant-disease-model")
    
    print("\n‚úÖ Uploaded!")
print("üîó https://huggingface.co/Warrior025/plant-disease-model")