Author: Soumyadip Mukherjee
Team Name: Soumo275
Team Members: Soumyadip Mukherjee
Leaderboard Rank: 38

In [5]:
!pip install opencv-python scikit-learn matplotlib




In [6]:
import zipfile
import os

zip_path = "/content/soil-classification-part-2.zip"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("soil-classification")

In [7]:
import os
from pathlib import Path
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import joblib

# Configuration
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
scaler = StandardScaler()
img_size = (224, 224)
train_path = '/content/soil-classification/soil_competition-2025/train'

# Feature extraction
def get_features(img_path):
    img = load_img(img_path, target_size=img_size)
    x = img_to_array(img).reshape(1, *img_size, 3)
    return model.predict(x).flatten()

# Process images
features = []
for img_file in Path(train_path).glob('*'):
    if img_file.suffix.lower() in ('.jpg', '.jpeg', '.png'):
        try:
            features.append(get_features(str(img_file)))
        except Exception as e:
            print(f"Error: {img_file.name} - {str(e)}")

# Train model
X = scaler.fit_transform(features)
ocsvm = OneClassSVM(kernel='rbf', nu=0.1).fit(X)
joblib.dump((ocsvm, scaler), 'soil_model.pkl')
print(f"Trained on {len(X)} samples | Model saved")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms

In [8]:
from pathlib import Path
import pandas as pd
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import load_img
import joblib

# Config
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
df = pd.read_csv('/content/soil-classification/soil_competition-2025/test_ids.csv')
clf, scaler = joblib.load('soil_ocsvm_model.pkl')

# Feature extraction
def extract_features(path):
    img = load_img(path, target_size=(224, 224))
    arr = np.expand_dims(img, axis=0)  # Add batch dimension
    return model.predict(arr).flatten()

# Prediction loop
df['predicted_label'] = [
    (1 if clf.predict(scaler.transform(extract_features(Path(test_dir)/row['image_id']).reshape(1,-1)))[0] == 1 else 0)
    if Path.exists(Path(test_dir)/row['image_id']) else -1
    for _, row in df.iterrows()
]

df.to_csv('test_predictions.csv', index=False)
print(f"✅ Saved {len(df)} predictions")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms

In [10]:
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import numpy as np # Import numpy

X_train, X_test = train_test_split(features, test_size=0.2, random_state=42)
X_train_scaled = scaler.fit_transform(X_train)
ocsvm.fit(X_train_scaled)

X_test_scaled = scaler.transform(X_test)

y_pred = ocsvm.predict(X_test_scaled)

f1 = f1_score(y_true, y_pred, pos_label=1)
print("F1 Score:", f1)

F1 Score: 0.9151785714285714
