# Soil Classification - Inference Notebook

This notebook demonstrates how to use the trained soil classification model for inference on new images.

In [None]:
"""

Author: Annam.ai IIT Ropar
Team Name: SoilClassifiers
Team Members: Krishnopreya , Deba , Shweta, Namya, Nikhil
Leaderboard Rank:101

"""
import torch
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import transforms
import sys
import os

# Add src directory to path
sys.path.append('../src')
from training import SoilClassifier
from preprocessing import SoilDataset

## Load Trained Model

In [None]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load model
model = SoilClassifier(num_classes=4)
model.load_state_dict(torch.load('../models/best_model.pth', map_location=device))
model.to(device)
model.eval()

print("Model loaded successfully!")

## Define Preprocessing and Class Names

In [None]:
# Define the same transforms used during training (without augmentation)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Class names
class_names = ["Alluvial soil", "Black Soil", "Clay soil", "Red soil"]
print(f"Classes: {class_names}")

## Single Image Prediction Function

In [None]:
def predict_single_image(image_path, model, transform, class_names, device):
    """
    Predict soil type for a single image
    """
    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)

    # Make prediction
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probabilities, 1)

    predicted_class = class_names[predicted.item()]
    confidence_score = confidence.item()

    return predicted_class, confidence_score, probabilities.cpu().numpy()[0]

## Example: Predict on Test Images

In [None]:
# Example usage - replace with actual image paths
test_images = [
    '../data/test/sample1.jpg',  # Replace with actual test image paths
    '../data/test/sample2.jpg',
    '../data/test/sample3.jpg',
]

# Predict for each test image
fig, axes = plt.subplots(1, len(test_images), figsize=(15, 5))

for i, img_path in enumerate(test_images):
    if os.path.exists(img_path):
        # Make prediction
        predicted_class, confidence, all_probs = predict_single_image(
            img_path, model, transform, class_names, device
        )

        # Display image and prediction
        image = Image.open(img_path)
        axes[i].imshow(image)
        axes[i].set_title(f'Predicted: {predicted_class}\nConfidence: {confidence:.2f}')
        axes[i].axis('off')

        # Print detailed results
        print(f"\nImage: {img_path}")
        print(f"Predicted Class: {predicted_class}")
        print(f"Confidence: {confidence:.4f}")
        print("All probabilities:")
        for j, class_name in enumerate(class_names):
            print(f"  {class_name}: {all_probs[j]:.4f}")
    else:
        axes[i].text(0.5, 0.5, f'Image not found:\n{img_path}',
                    ha='center', va='center', transform=axes[i].transAxes)
        axes[i].axis('off')

plt.tight_layout()
plt.show()

## Batch Prediction on Test Set

In [None]:
# Load test data
test_csv_path = '../data/test_ids.csv'
test_dir = '../data/test'

if os.path.exists(test_csv_path):
    test_df = pd.read_csv(test_csv_path)
    test_dataset = SoilDataset(test_df, test_dir, transform=transform, is_test=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Generate predictions
    all_predictions = []
    all_confidences = []

    model.eval()
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(device)
            outputs = model(images)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            confidences, predictions = torch.max(probabilities, 1)

            all_predictions.extend(predictions.cpu().numpy())
            all_confidences.extend(confidences.cpu().numpy())

    # Create submission dataframe
    submission_df = pd.DataFrame({
        'image_id': test_df['image_id'],
        'soil_type': [class_names[pred] for pred in all_predictions],
        'confidence': all_confidences
    })

    print("Prediction Summary:")
    print(submission_df['soil_type'].value_counts())
    print(f"\nAverage confidence: {np.mean(all_confidences):.4f}")

    # Save predictions
    submission_df.to_csv('../output/inference_results.csv', index=False)
    print("\nPredictions saved to '../output/inference_results.csv'")

    # Display first few predictions
    print("\nFirst 10 predictions:")
    print(submission_df.head(10))
else:
    print(f"Test CSV file not found: {test_csv_path}")

## Visualize Prediction Distribution

In [None]:
# Plot prediction distribution
if 'submission_df' in locals():
    plt.figure(figsize=(12, 5))

    # Distribution of predictions
    plt.subplot(1, 2, 1)
    submission_df['soil_type'].value_counts().plot(kind='bar')
    plt.title('Distribution of Predicted Soil Types')
    plt.xlabel('Soil Type')
    plt.ylabel('Count')
    plt.xticks(rotation=45)

    # Confidence distribution
    plt.subplot(1, 2, 2)
    plt.hist(submission_df['confidence'], bins=20, alpha=0.7)
    plt.title('Distribution of Prediction Confidence')
    plt.xlabel('Confidence Score')
    plt.ylabel('Frequency')

    plt.tight_layout()
    plt.show()

    # Statistics by soil type
    print("\nConfidence Statistics by Soil Type:")
    confidence_stats = submission_df.groupby('soil_type')['confidence'].agg(['mean', 'std', 'min', 'max'])
    print(confidence_stats)

## Model Performance Analysis

In [None]:
# Load validation results if available
import json

metrics_file = '../docs/cards/ml-metrics.json'
if os.path.exists(metrics_file):
    with open(metrics_file, 'r') as f:
        metrics = json.load(f)

    print("Model Performance Metrics:")
    print("="*50)

    # Overall accuracy
    print(f"Overall Accuracy: {metrics['accuracy']:.4f}")
    print(f"Macro Average F1-Score: {metrics['macro avg']['f1-score']:.4f}")
    print(f"Weighted Average F1-Score: {metrics['weighted avg']['f1-score']:.4f}")

    print("\nPer-Class Performance:")
    print("-"*50)
    for class_name in class_names:
        if class_name in metrics:
            class_metrics = metrics[class_name]
            print(f"{class_name}:")
            print(f"  Precision: {class_metrics['precision']:.4f}")
            print(f"  Recall: {class_metrics['recall']:.4f}")
            print(f"  F1-Score: {class_metrics['f1-score']:.4f}")
            print(f"  Support: {class_metrics['support']}")
            print()
else:
    print(f"Metrics file not found: {metrics_file}")

## Custom Image Prediction Function

In [None]:
def predict_from_path(image_path):
    """
    Simple function to predict soil type from image path
    """
    try:
        predicted_class, confidence, all_probs = predict_single_image(
            image_path, model, transform, class_names, device
        )

        print(f"Image: {os.path.basename(image_path)}")
        print(f"Predicted Soil Type: {predicted_class}")
        print(f"Confidence: {confidence:.4f}")
        print("\nAll Class Probabilities:")
        for i, class_name in enumerate(class_names):
            print(f"  {class_name}: {all_probs[i]:.4f}")

        # Display image
        img = Image.open(image_path)
        plt.figure(figsize=(8, 6))
        plt.imshow(img)
        plt.title(f'Predicted: {predicted_class} (Confidence: {confidence:.3f})')
        plt.axis('off')
        plt.show()

        return predicted_class, confidence

    except Exception as e:
        print(f"Error processing image {image_path}: {str(e)}")
        return None, None

# Example usage:
# predicted_class, confidence = predict_from_path('path/to/your/image.jpg')

## Export Functions for External Use

In [None]:
# Save prediction functions as a module
inference_code = '''
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
import sys
import os
sys.path.append('../src')
from training import SoilClassifier

class SoilClassificationInference:
    def __init__(self, model_path, device=None):
        self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.class_names = ["Alluvial soil", "Black Soil", "Clay soil", "Red soil"]

        # Load model
        self.model = SoilClassifier(num_classes=4)
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.to(self.device)
        self.model.eval()

        # Define transform
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def predict(self, image_path):
        """Predict soil type for a single image"""
        image = Image.open(image_path).convert('RGB')
        image_tensor = self.transform(image).unsqueeze(0).to(self.device)

        with torch.no_grad():
            outputs = self.model(image_tensor)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probabilities, 1)

        predicted_class = self.class_names[predicted.item()]
        confidence_score = confidence.item()
        all_probs = probabilities.cpu().numpy()[0]

        return {
            'predicted_class': predicted_class,
            'confidence': confidence_score,
            'all_probabilities': {name: prob for name, prob in zip(self.class_names, all_probs)}
        }

# Usage example:
# classifier = SoilClassificationInference('../models/best_model.pth')
# result = classifier.predict('path/to/image.jpg')
# print(result)
'''

# Save to file
with open('../src/inference_utils.py', 'w') as f:
    f.write(inference_code)

print("Inference utilities saved to '../src/inference_utils.py'")
print("\nYou can now use it in other scripts:")
print("from src.inference_utils import SoilClassificationInference")
print("classifier = SoilClassificationInference('models/best_model.pth')")
print("result = classifier.predict('path/to/image.jpg')")

## Summary

This notebook demonstrates:
1. Loading a trained soil classification model
2. Making predictions on single images
3. Batch processing of test images
4. Analyzing prediction confidence and distribution
5. Visualizing results
6. Creating reusable inference utilities

The model can classify soil into 4 types:
- Alluvial soil
- Black Soil  
- Clay soil
- Red soil

For production use, consider:
- Input validation and error handling
- Batch processing optimization
- Model versioning and updates
- API integration
- Performance monitoring