In [None]:
# Week 2 - Model Development & Training
# AICTE Cycle 3 (2025) - Sustainable Agriculture Project
# 📌 Load processed data from Week 1
import pickle

with open("processed_data.pkl", "rb") as f:
    X_train, X_test, y_train, y_test, categories = pickle.load(f)

print("✅ Data loaded successfully!")
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
print("Number of classes:", len(categories))

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Load preprocessed data (from Week 1 notebook)
# X_train, X_test, y_train, y_test must already be available
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)

# Convert labels to categorical (one-hot encoding)
num_classes = len(categories)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Build CNN Model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_test, y_test_cat),
    epochs=10,
    batch_size=32,
    verbose=1
)

# Plot training history
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend(); plt.title("Accuracy")

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend(); plt.title("Loss")
plt.show()

# Evaluate model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Unique labels actually present in y_test
labels = np.unique(y_test)

# Match labels to category names
target_names = [categories[i] for i in labels]

print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, labels=labels, target_names=target_names))


# Confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.title("Confusion Matrix")
plt.show()

#Misclassified examples (to showcase in report):
misclassified_idx = np.where(y_test != y_pred_classes)[0]
plt.figure(figsize=(12, 6))
for i, idx in enumerate(misclassified_idx[:9]):  # first 9 misclassified
    plt.subplot(3, 3, i+1)
    plt.imshow(X_test[idx])
    plt.title(f"True: {target_names[y_test[idx]]}\nPred: {target_names[y_pred_classes[idx]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()




✅ Data loaded successfully!
Training data shape: (5620, 128, 128, 3)
Testing data shape: (1405, 128, 128, 3)
Number of classes: 16
Training data shape: (5620, 128, 128, 3)
Testing data shape: (1405, 128, 128, 3)
Epoch 1/10


In [11]:
# Save trained model
model.save("crop_disease_model.h5")
print("✅ Model saved as crop_disease_model.h5")

# Save training history
import pickle
with open("training_history.pkl", "wb") as f:
    pickle.dump(history.history, f)
print("✅ Training history saved")




✅ Model saved as crop_disease_model.h5
✅ Training history saved
