# training.ipynb

"""

Author: Annam.ai IIT Ropar
Team Name: Ice 'N' Dagger
Team Members: Barun Saha, Bibaswan Das
Leaderboard Rank: 70 

"""

# This is the notebook used for training the model.

In [None]:
# Imports and Setup
import numpy as np
import pandas as pd
import cv2
import os
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# Paths and Data Loading
TRAIN_PATH = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train'
TRAIN_CSV = '/kaggle/input/soil-classification-part-2/soil_competition-2025/train_labels.csv'
train_df = pd.read_csv(TRAIN_CSV)

In [None]:
#  Load Feature Extractors
def load_feature_extractors():
    resnet_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')
    return resnet_model, vgg_model

resnet_model, vgg_model = load_feature_extractors()

In [None]:
#  Feature Extraction Function
def extract_features_from_image(img_path, resnet_model, vgg_model, img_size=(224, 224)):
    img = cv2.imread(img_path)
    if img is None:
        return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img, img_size)
    resnet_input = resnet_preprocess(np.expand_dims(img_resized, axis=0))
    vgg_input = vgg_preprocess(np.expand_dims(img_resized, axis=0))
    resnet_features = resnet_model.predict(resnet_input, verbose=0).flatten()
    vgg_features = vgg_model.predict(vgg_input, verbose=0).flatten()
    return np.concatenate([resnet_features, vgg_features])

In [None]:
#  Extract Features from Training Set
def extract_training_features(train_df, train_path, resnet_model, vgg_model):
    features_list = []
    for _, row in train_df.iterrows():
        img_path = os.path.join(train_path, row['image_id'])
        features = extract_features_from_image(img_path, resnet_model, vgg_model)
        if features is not None:
            features_list.append(features)
    return np.array(features_list)

train_features = extract_training_features(train_df, TRAIN_PATH, resnet_model, vgg_model)

In [None]:
#  Feature Scaling
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)

In [None]:
#  Save Scaler
import joblib
joblib.dump(scaler, '/kaggle/working/scaler.pkl')

In [None]:
#  Train One-Class SVM
from sklearn.svm import OneClassSVM
ocsvm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.01)
ocsvm.fit(train_features_scaled)

In [None]:
#  Save OCSVM Model
joblib.dump(ocsvm, '/kaggle/working/ocsvm_model.pkl')

In [None]:
# Summary
print("Training complete. Model and scaler saved to /kaggle/working/")