In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from torchvision import models, transforms
import torch

# === CONFIG ===
TRAIN_CSV = r"C:\Users\LENOVO FLEX\Downloads\soil-classification-part-2\soil_competition-2025\train_labels.csv"
TEST_IDS_CSV = r"C:\Users\LENOVO FLEX\Downloads\soil-classification-part-2\soil_competition-2025\test_ids.csv"
TRAIN_IMG_DIR = r"C:\Users\LENOVO FLEX\Downloads\soil-classification-part-2\soil_competition-2025\data\train"
TEST_IMG_DIR = r"C:\Users\LENOVO FLEX\Downloads\soil-classification-part-2\soil_competition-2025\data\test"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# === Load CSV ===
df_train = pd.read_csv(TRAIN_CSV)
df_test_ids = pd.read_csv(TEST_IDS_CSV)

# === Image Preprocessing ===
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# === Load Pretrained ResNet50 (feature extractor) ===
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Identity()
model.eval().to(DEVICE)

def extract_features(image_path):
    try:
        image = Image.open(image_path).convert('RGB')
        image_tensor = transform(image).unsqueeze(0).to(DEVICE)
        with torch.no_grad():
            features = model(image_tensor).cpu().numpy().flatten()
        return features
    except Exception as e:
        print(f" Error with {image_path}: {e}")
        return None

# === Extract soil features ===
soil_features = []
print("🔍 Extracting features from soil images...")
for image_id in tqdm(df_train['image_id']):
    filename = f"{image_id}"
    img_path = os.path.join(TRAIN_IMG_DIR, filename)
    feat = extract_features(img_path)
    if feat is not None:
        soil_features.append(feat)

soil_features = np.array(soil_features)

# === Scale features ===
scaler = StandardScaler()
soil_features_scaled = scaler.fit_transform(soil_features)

# === Train One-Class SVM ===
print(" Training One-Class SVM...")
svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.1)
svm.fit(soil_features_scaled)

# === Predict on test images ===
print(" Predicting on test set...")
results = []

for img_id in tqdm(df_test_ids['image_id']):
    filename = f"{img_id}"
    img_path = os.path.join(TEST_IMG_DIR, filename)
    feat = extract_features(img_path)
    if feat is None:
        continue
    feat_scaled = scaler.transform([feat])
    pred = svm.predict(feat_scaled)[0]
    label = 1 if pred == 1 else 0
    results.append({'image_id': img_id, 'label': label})

# === Save predictions ===
output_df = pd.DataFrame(results)
output_df.to_csv(r"C:\Users\LENOVO FLEX\Downloads\soil-classification-part-2\soil_competition-2025\final_submission_revised.csv", index=False)
print(" submission.csv created successfully.")




🔍 Extracting features from soil images...


100%|██████████| 1222/1222 [01:43<00:00, 11.81it/s]


🧠 Training One-Class SVM...
🔍 Predicting on test set...


100%|██████████| 967/967 [01:18<00:00, 12.40it/s]

✅ submission.csv created successfully.



