In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Paths:
DATA_DIR = "/kaggle/input/soil-classification/soil_classification-2025"
IMG_DIR = os.path.join(DATA_DIR, "train")
LABELS_FILE = os.path.join(DATA_DIR, "train_labels.csv")

# Config:
IMG_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 15

# Load Data:
df = pd.read_csv(LABELS_FILE)
df["filepath"] = df["image_id"].apply(lambda x: os.path.join(IMG_DIR, x))
df["soil_type"] = df["soil_type"].astype(str)

# Encode Labels:
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["soil_type"])
class_names = list(label_encoder.classes_)
NUM_CLASSES = len(class_names)

# Load Images:
def load_image(path):
    img = Image.open(path).convert("RGB").resize((IMG_SIZE, IMG_SIZE))
    return np.array(img).astype(np.float32) / 255.0

X = np.array([load_image(path) for path in df["filepath"]])
y = tf.keras.utils.to_categorical(df["label"], num_classes=NUM_CLASSES)

# Build Model:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=(IMG_SIZE, IMG_SIZE, 3)),  
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

# Compile:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train:
model.fit(X, y, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)

# Evaluate:
y_pred = model.predict(X)
y_true = np.argmax(y, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

print("\nClassification Report:\n")
print(classification_report(y_true, y_pred_labels, target_names=class_names))

f1 = f1_score(y_true, y_pred_labels, average=None)
for i, score in enumerate(f1):
    print(f"F1 score for class {class_names[i]}: {score:.4f}")
print(f"\n Minimum F1 score across classes: {min(f1):.4f}")

# Save Model:
model.save("soil_classifier_f1_1.h5")
print(" Model saved as 'soil_classifier_f1_1.h5'")

# Generate Submission for Test Set:

# Test Paths:
TEST_DIR = os.path.join(DATA_DIR, "test")
TEST_IDS_FILE = os.path.join(DATA_DIR, "test_ids.csv")

# Load test data:
test_df = pd.read_csv(TEST_IDS_FILE)
test_df["filepath"] = test_df["image_id"].apply(lambda x: os.path.join(TEST_DIR, x))

# Load and preprocess test images:
X_test = np.array([load_image(p) for p in test_df["filepath"]])

# Predict:
y_test_pred = model.predict(X_test)
y_test_labels = label_encoder.inverse_transform(np.argmax(y_test_pred, axis=1))

# Save submission.csv:
submission = pd.DataFrame({
    "image_id": test_df["image_id"],
    "soil_type": y_test_labels
})
submission.to_csv("submission.csv", index=False)
print("Submission file 'submission.csv' created")
