In [7]:
import pandas as pd
import numpy as np
import cv2
import os
from tqdm import tqdm

In [8]:
cleaned_data_path = "../data/processed/cleaned_fabrics_data.csv"
features_output_path = "../data/processed/features.csv"

In [9]:
print("📥 Loading cleaned dataset...")
df = pd.read_csv(cleaned_data_path)
print(f"✅ Loaded {len(df)} records\n")

📥 Loading cleaned dataset...
✅ Loaded 2833 records



In [10]:
def extract_features(image_path):
    """Extract color, texture, and edge-based features from a fabric image."""
    try:
        img = cv2.imread(image_path)
        if img is None:
            return None

        img = cv2.resize(img, (128, 128))  # Standardize image size
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # --- 1️⃣ Color Features ---
        mean_color = img.mean(axis=(0, 1))
        std_color = img.std(axis=(0, 1))

        # --- 2️⃣ Texture Features (using GLCM-like stats) ---
        contrast = img_gray.std()  # rough texture measure
        smoothness = 1 / (1 + contrast)  # smoothness index
        entropy = -np.sum((img_gray / 255.0) * np.log2(img_gray / 255.0 + 1e-9))

        # --- 3️⃣ Edge Features ---
        edges = cv2.Canny(img_gray, 100, 200)
        edge_density = np.sum(edges > 0) / edges.size

        # --- Combine all features ---
        features = np.hstack([
            mean_color, std_color,
            [contrast, smoothness, entropy, edge_density]
        ])
        return features

    except Exception as e:
        print(f"⚠️ Error processing {image_path}: {e}")
        return None

In [11]:
feature_list = []
image_paths = df['file_path'].tolist()
labels = df['defect_code'].tolist()

print("🔄 Extracting features from fabric images...")

for img_path, label in tqdm(zip(image_paths, labels), total=len(image_paths)):
    full_path = os.path.join("..", img_path)  # Adjust to relative directory
    feats = extract_features(full_path)
    if feats is not None:
        feature_list.append([label] + feats.tolist())


🔄 Extracting features from fabric images...


100%|██████████| 2833/2833 [00:57<00:00, 49.63it/s]


In [12]:
columns = [
    'defect_code',
    'mean_R', 'mean_G', 'mean_B',
    'std_R', 'std_G', 'std_B',
    'contrast', 'smoothness', 'entropy', 'edge_density'
]

features_df = pd.DataFrame(feature_list, columns=columns)

print(f"\n✅ Feature extraction complete! Extracted {len(features_df)} samples.")
print(features_df.head())


✅ Feature extraction complete! Extracted 2833 samples.
   defect_code     mean_R     mean_G     mean_B      std_R      std_G  \
0            0  75.373413  39.617188  23.330566  34.021075  24.308349   
1            0  79.043579  42.547974  24.335571  32.848480  23.765535   
2            1  78.707214  42.473450  24.625061  33.242653  23.830912   
3            0  78.964783  42.764648  24.944946  33.487846  24.235159   
4            0  79.308594  43.271423  25.482056  33.818597  24.076371   

       std_B   contrast  smoothness      entropy  edge_density  
0  17.515105  22.761960    0.042084  6115.776330      0.354004  
1  17.445949  22.255746    0.043000  6374.644691      0.355957  
2  17.578732  22.406508    0.042723  6370.795024      0.344666  
3  17.820972  22.716422    0.042165  6380.411239      0.347900  
4  17.908623  22.688086    0.042215  6424.369858      0.347595  


In [13]:
os.makedirs(os.path.dirname(features_output_path), exist_ok=True)
features_df.to_csv(features_output_path, index=False)

print(f"\n💾 Features saved successfully at: {features_output_path}")


💾 Features saved successfully at: ../data/processed/features.csv


In [18]:
df = pd.read_csv(features_output_path)
df.head(10)

Unnamed: 0,defect_code,mean_R,mean_G,mean_B,std_R,std_G,std_B,contrast,smoothness,entropy,edge_density
0,0,75.373413,39.617188,23.330566,34.021075,24.308349,17.515105,22.76196,0.042084,6115.77633,0.354004
1,0,79.043579,42.547974,24.335571,32.84848,23.765535,17.445949,22.255746,0.043,6374.644691,0.355957
2,1,78.707214,42.47345,24.625061,33.242653,23.830912,17.578732,22.406508,0.042723,6370.795024,0.344666
3,0,78.964783,42.764648,24.944946,33.487846,24.235159,17.820972,22.716422,0.042165,6380.411239,0.3479
4,0,79.308594,43.271423,25.482056,33.818597,24.076371,17.908623,22.688086,0.042215,6424.369858,0.347595
5,0,78.884583,42.796692,25.059448,33.83398,24.385359,17.772904,22.832381,0.04196,6377.170377,0.345764
6,0,78.485474,42.39978,24.674805,33.367483,24.018061,17.712789,22.518832,0.042519,6358.208743,0.341919
7,0,80.733826,43.675537,25.568481,32.637772,23.652691,17.427096,22.183294,0.043135,6490.71707,0.341675
8,0,80.580505,43.55365,25.343018,33.067157,23.899104,17.644531,22.440427,0.042661,6457.363125,0.346008
9,0,79.619812,42.849426,24.672791,33.158068,23.72167,17.450682,22.321973,0.042878,6397.188269,0.34845
