### Importing Libraries

In [1]:
import os
import cv2
import pandas as pd
import cupy as cp  # CuPy for GPU-based NumPy operations
import numpy as np
import tensorflow as tf
import scipy
from skimage.feature import local_binary_pattern
from skimage.filters import gabor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
#!pip install cupy --no-cache-dir

Collecting cupy
  Downloading cupy-13.3.0.tar.gz (3.4 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m167.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: cupy
Y
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for cupy (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for cupy[0m[31m
[0m[?25h  Running setup.py clean for cupy
Failed to build cupy
[31mERROR: ERROR: Failed to build installable wheels for some pyproject.toml based projects (cupy)[0m[31m
[0m

### Feature Extraction

**Define the paths for the images**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Define paths
dataset_dir = "/content/drive/My Drive/Fabric Detection Project/textures 3"
categories = ['cotton', 'corduroy', 'denim', 'linin', 'wool']

**Canny Edge Detection**

In [4]:
def extract_canny_edge_detection(image):
    """ image must be passes to the function in grayscale"""
    # Step 1: Enhance contrast (optional)
    equalized_image = cp.asarray(cv2.equalizeHist(cp.asnumpy(image)))

    # Step 2: Apply Gaussian Blur to reduce noise
    blurred_image = cp.asarray(cv2.GaussianBlur(cp.asnumpy(equalized_image), (3, 3), 1))

    # Step 3: Apply Canny Edge Detection with adjusted thresholds (convert back and forth)
    edges = cp.asarray(cv2.Canny(cp.asnumpy(blurred_image), 30, 30))

    return edges

**Gabor Filtering**

In [5]:
def extract_gabor_filters(image):
    """ image must be in grayscale"""

    def build_kernels():
        # Parameters
        gabor_kernels = []
        angles = [0, cp.pi/4, cp.pi/2, 3*cp.pi/4]  # Use CuPy for angles
        ksize = 31  # Size of the filter
        sigma = 4.0  # Standard deviation of the Gaussian envelope
        lambd = 10.0  # Wavelength of the sinusoidal factor
        gamma = 0.5  # Spatial aspect ratio
        psi = 0  # Phase offset

        # Create Gabor kernels
        for theta in np.deg2rad([45, 135]):  # Convert degrees to radians
            kernel = cp.asarray(cv2.getGaborKernel((ksize, ksize), sigma, theta, lambd, gamma, psi, ktype=cv2.CV_32F)) # Using Cupy array
            gabor_kernels.append(kernel)

        return gabor_kernels


    gabor_kernels = build_kernels()

    gabor_features = []

    for kernel in gabor_kernels:
        fimg = cp.asarray(cv2.filter2D(cp.asnumpy(image), cv2.CV_8UC3, cp.asnumpy(kernel)))
        gabor_features.append(fimg)

    gabor_features = cp.array(gabor_features).flatten()

    return gabor_features

**Local Binary Pattern**

In [6]:
def extract_local_binary_pattern(image):

    # Parameters
    radius = 1
    n_points = 8 * radius


    lbp = local_binary_pattern(cp.asnumpy(image), n_points, radius, method="uniform")
    (hist, _) = cp.histogram(cp.asarray(lbp).ravel(), bins=cp.arange(0, n_points + 3),
                             range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)

    return hist

**Feature Extraction**

In [7]:
# Function to extract features from an image
def extract_features(image):
    # Convert to grayscale using CuPy arrays
    gray = cp.asarray(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))

    # Canny edge detection
    edges = extract_canny_edge_detection(gray)

    # Gabor Filter responses
    gabor_features=  extract_gabor_filters(gray)

    # Local Binary Patterns (LBP)
    hist = extract_local_binary_pattern(gray)

    # Combine features: edges, Gabor, and LBP
    features = cp.hstack([edges.flatten(), gabor_features, hist])
    features = cp.asnumpy(features)

    return features # Return features back as NumPy array for further processing

**Image Augmentation**

### Modified code for grouping of images and also including the original image with its augmentations. Also creating only 4 augmentations of each original image

In [8]:
# Prepare dataset, labels, and groups
X = []
y = []
groups = []  # This will store the group IDs

# Image Augmentation using TensorFlow
datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    shear_range=0.4
)

group_id = 0  # Initialize group ID

for category in categories:
    path = os.path.join(dataset_dir, category)
    label = category

    for count, img_name in enumerate(os.listdir(path), start=1):

        img_path = os.path.join(path, img_name)
        image = cv2.imread(img_path)

        try:
            # Apply data augmentation and extract features
            image = cv2.resize(image, (128, 128))  # Resize to a fixed size

            # Extract features from the original image
            original_features = extract_features(image)
            X.append(original_features)
            y.append(label)
            groups.append(group_id)  # Assign the group ID to the original image

            # Prepare for augmentation
            image = np.expand_dims(image, axis=0)
            aug_iter = datagen.flow(image, batch_size=1)

            # Perform 4 augmentations per image
            for _ in range(4):
                aug_img = next(aug_iter)[0].astype(np.uint8)
                features = extract_features(aug_img)
                X.append(features)
                y.append(label)
                groups.append(group_id)  # Assign the same group ID to the augmentations

            # Increment group ID for the next image and its augmentations
            group_id += 1

        except Exception as e:
            print(f"{e} Error processing image: {img_path}, {img_name}")


print()

OpenCV(4.10.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'
 Error processing image: /content/drive/My Drive/Fabric Detection Project/textures 3/corduroy/.ipynb_checkpoints, .ipynb_checkpoints



## Feature Engineering

**Converting X, y, groups from lists to ndarrays**

In [9]:
# Checking the data type of X,y and groups
print(f"Type of X {type(X)}")
print(f"Type of y {type(y)}")
print(f"Type of groups {type(groups)}")

Type of X <class 'list'>
Type of y <class 'list'>
Type of groups <class 'list'>


In [10]:
# Convert lists to NumPy arrays
X = np.array(X)
y = np.array(y)
groups = np.array(groups)

In [11]:
# Checking the data type of X,y and groups
print(f"Type of X {type(X)}")
print(f"Type of y {type(y)}")
print(f"Type of groups {type(groups)}")

Type of X <class 'numpy.ndarray'>
Type of y <class 'numpy.ndarray'>
Type of groups <class 'numpy.ndarray'>


**Shape of X, y, groups**

In [12]:
# Checking the shape of the dataset and the labels
print(f"Dataset shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Groups shape: {groups.shape}")

Dataset shape: (17875, 49162)
Labels shape: (17875,)
Groups shape: (17875,)


### Inspecting the dtypes to save memory

In [13]:
print(f"Dtype of X {X.dtype}")
print(f"Dtype of y {y.dtype}")
print(f"Dtype of groups {groups.dtype}")

Dtype of X float64
Dtype of y <U8
Dtype of groups int64


**Inspecting the dtype of X**

In [14]:
print(f"Dtype of X {X.dtype}")

Dtype of X float64


In [15]:
# Checking the size of X in GB
print(f"Size(GB) of X {X.nbytes/1e9}")

Size(GB) of X 7.030166


In [16]:
import numpy as np

# Check if any values have non-zero decimals
has_decimals = np.any(X != np.floor(X))

if not has_decimals:
    # Convert the array to int32 if no decimals
    X = X.astype(np.int32)
    print("Array converted to int32 without decimals.")
else:
    print("Array contains non-zero decimals.")

print(f"Size(GB) of X {X.nbytes/1e9}")


Array contains non-zero decimals.
Size(GB) of X 7.030166


In [17]:
len(np.unique(X))

5840

In [18]:
# Check if values exceed float16 limits
float16_min = np.finfo(np.float16).min
float16_max = np.finfo(np.float16).max

# Check if any value is outside the float32 range
if np.any(X < float16_min) or np.any(X > float16_max):
    print("Array contains values outside the float16 range.")
else:
    print("All values are within the float16 range.")

All values are within the float16 range.


In [19]:
# Converting X from float64 to float16
# Checking the size of X in GB
print(f"Size(GB) of X when float64 {X.nbytes/1e9}")

X = X.astype(np.float16)

# Checking the size of X in GB
print(f"Size(GB) of X after converting to float16 {X.nbytes/1e9}")

Size(GB) of X when float64 7.030166
Size(GB) of X after converting to float16 1.7575415


**Inspecting the dtype of y**

1. Encoding y

In [20]:
# Example array with categories
categories = ['linen', 'cotton', 'wool', 'denim', 'corduroy']

# Create a dictionary for manual mapping
category_mapping = { 'corduroy': 1, 'cotton': 2, 'denim': 3, 'linin': 4, 'wool': 5}

# Convert to pandas Series (optional if already in pandas)
y_series = pd.Series(y)

# Map categories to numbers
mapped_categories = y_series.map(category_mapping)
y = np.array(mapped_categories)


print(y)
print(y.shape)

[2 2 2 ... 5 5 5]
(17875,)


In [21]:
# Freeing up memory
del y_series
del mapped_categories

2. Checking the dtye of y

In [22]:
print(f"Dtype of y {y.dtype}")

Dtype of y int64


In [23]:
print(f"Size(GB) of y {y.nbytes/1e9}")

Size(GB) of y 0.000143


In [24]:
print(np.unique(y))

[1 2 3 4 5]


In [25]:
# Converting the dtype of y to uint8
y = y.astype(np.uint8)

print(f"Dtype of y {y.dtype}")
print(f"Size(GB) of y {y.nbytes/1e9}")

Dtype of y uint8
Size(GB) of y 1.7875e-05


**Inspecting the dtype of groups**

In [26]:
print(f"Dtype of groups {groups.dtype}")

Dtype of groups int64


In [27]:
# Get the min and max values for int8
int8_min = np.iinfo(np.int8).min
int8_max = np.iinfo(np.int8).max

# Check if all values fall within the int8 range
if np.all((groups >= int8_min) & (groups <= int8_max)):
    print("All values fall within the int8 range.")
else:
    print("Some values are outside the int8 range.")

Some values are outside the int8 range.


In [28]:
# Get the min and max values for int16
int16_min = np.iinfo(np.int16).min
int16_max = np.iinfo(np.int16).max

# Check if all values fall within the int16 range
if np.all((groups >= int16_min) & (groups <= int16_max)):
    print("All values fall within the int16 range.")
else:
    print("Some values are outside the int16 range.")

All values fall within the int16 range.


In [29]:
# Changing the dtype of groups into int32
print(f"Size of groups when int64 is {groups.nbytes/1e9}")
groups = groups.astype(np.int16)
print(f"Size of groups after changing to int16 is {groups.nbytes/1e9}")

Size of groups when int64 is 0.000143
Size of groups after changing to int16 is 3.575e-05


In [31]:

# Path to the 'Extracted Features' directory
save_dir = "/content/drive/My Drive/Fabric Detection Project/Extracted Features"

# Create 'Extracted Features' directory if it doesn't exist
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Save each array as a separate .npz file
np.savez(os.path.join(save_dir, 'X.npz'), data=X)
np.savez(os.path.join(save_dir, 'y.npz'), labels=y)
np.savez(os.path.join(save_dir, 'groups.npz'), groups=groups)

print(f"Data, labels, and groups have been saved to {save_dir}")


Data, labels, and groups have been saved to /content/drive/My Drive/Fabric Detection Project/Extracted Features


In [30]:
# Estimate the size of each array in gigabytes (in memory)
import sys
data_size = sys.getsizeof(X) / (1024 * 1024 * 1024)
labels_size = sys.getsizeof(y) / (1024 * 1024 * 1024)
groups_size = sys.getsizeof(groups) / (1024 * 1024 * 1024)

print(f"Size of the data.npz :{data_size}")
print(f"Size of the labels.npz :{labels_size}")
print(f"Size of the groups.npz :{groups_size}")


Size of the data.npz :1.6368381939828396
Size of the labels.npz :1.6751699149608612e-05
Size of the groups.npz :3.339909017086029e-05


### New CODE FOR DATA AUGMENTATION

In [None]:
import numpy as np
import os
import cv2
import gc
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Initialize batch size for saving
batch_size = 1000  # You can adjust this based on memory capacity
save_count = 0  # To track saved files

# Initialize arrays
data = []
labels = []
groups = []  # This will store the group IDs

group_id = 0  # Initialize group ID

# Image Augmentation using TensorFlow
datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    shear_range=0.4
)

for category in categories:
    path = os.path.join(dataset_dir, category)
    label = category

    for count, img_name in enumerate(os.listdir(path), start=1):

        img_path = os.path.join(path, img_name)
        image = cv2.imread(img_path)

        try:
            # Apply data augmentation and extract features
            image = cv2.resize(image, (128, 128))  # Resize to a fixed size

            # Extract features from the original image
            original_features = extract_features(image)
            data.append(original_features)
            labels.append(label)
            groups.append(group_id)  # Assign the group ID to the original image

            # Prepare for augmentation
            image = np.expand_dims(image, axis=0)
            aug_iter = datagen.flow(image, batch_size=1)

            # Perform 4 augmentations per image
            for _ in range(4):
                aug_img = next(aug_iter)[0].astype(np.uint8)
                features = extract_features(aug_img)
                data.append(features)
                labels.append(label)
                groups.append(group_id)  # Assign the same group ID to the augmentations

            # Increment group ID for the next image and its augmentations
            group_id += 1

            # Save data in batches
            if len(data) >= batch_size:
                # Save current batch to an .npz file
                np.savez_compressed(f'Extracted_features\\dataset_batch_{save_count}.npz', data=np.array(data), labels=np.array(labels), groups=np.array(groups))
                save_count += 1

                # Clear memory by resetting the arrays and forcing garbage collection
                data.clear()
                labels.clear()
                groups.clear()
                gc.collect()  # Force garbage collection to free memory

        except Exception as e:
            print(f"Error processing image: {img_path}, {img_name}")

# Save any remaining data if exists after loop ends
if data:
    np.savez_compressed(f'Extracted_features\\dataset_batch_{save_count}.npz', data=np.array(data), labels=np.array(labels), groups=np.array(groups))
    data.clear()
    labels.clear()
    groups.clear()
    gc.collect()  # Clean up the remaining memory


Error processing image: textures 3/corduroy/.ipynb_checkpoints, .ipynb_checkpoints


### New Code (using data,labels and groups directly as ndarrays rather than lists)

In [None]:

# Counting the number of rows(images) and features
import os

# Initialize counters
total_original_images = 0
total_augmented_images = 0
augmentation_per_image = 4  # As you're doing 4 augmentations per image
feature_size = None  # To store the size of features (columns)

for category in categories:
    path = os.path.join(dataset_dir, category)

    for count, img_name in enumerate(os.listdir(path), start=1):
        img_path = os.path.join(path, img_name)
        image = cv2.imread(img_path)

        try:
            image = cv2.resize(image, (128, 128))  # Resize to a fixed size
            original_features = extract_features(image)

            # Determine feature size (columns) from the first image processed
            if feature_size is None:
                feature_size = original_features.shape[0]  # Assuming 1D feature vector

            # Increment counts for original and augmented images
            total_original_images += 1
            total_augmented_images += augmentation_per_image

        except Exception as e:
            print(f"Error processing image: {img_path}, {img_name}")

# Total number of images = original + augmented
total_images = total_original_images + total_augmented_images

# Print the results
print(f"Total original images: {total_original_images}")
print(f"Total augmented images: {total_augmented_images}")
print(f"Total images including augmentations: {total_images}")
print(f"Number of features per image: {feature_size}")


Error processing image: /content/drive/My Drive/Fabric Detection Project/textures 3/corduroy/.ipynb_checkpoints, .ipynb_checkpoints
Total original images: 3575
Total augmented images: 14300
Total images including augmentations: 17875
Number of features per image: 49162


In [None]:
# Image Augmentation using TensorFlow
datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    shear_range=0.4
)

# Step 1: Create "Extracted_features" folder if it doesn't exist
output_dir = '/content/drive/My Drive/Fabric Detection Project/Extracted_features'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"Created directory: {output_dir}")

# Step 2: Define file paths for the memory-mapped arrays inside the new folder
data_file = os.path.join(output_dir, 'data_file.dat')
labels_file = os.path.join(output_dir, 'labels_file.dat')
groups_file = os.path.join(output_dir, 'groups_file.dat')
# Known values
total_images = total_images  # Total original images + augmentations
feature_size = feature_size   # Number of features per image

# Step 1: Preallocate memory-mapped arrays
data = np.memmap(data_file, dtype='float32', mode='w+', shape=(total_images, feature_size))
labels = np.memmap(labels_file, dtype='object', mode='w+', shape=(total_images,))
groups = np.memmap(groups_file, dtype='int32', mode='w+', shape=(total_images,))

# Step 2: Feature Extraction with memory mapping
group_id = 0  # Initialize group ID
image_counter = 0  # Keep track of which row we are filling in the arrays

for category in categories:
    path = os.path.join(dataset_dir, category)
    label = category

    for count, img_name in enumerate(os.listdir(path), start=1):
        img_path = os.path.join(path, img_name)
        image = cv2.imread(img_path)

        try:
            image = cv2.resize(image, (128, 128))  # Resize to a fixed size

            # Extract features from the original image
            original_features = extract_features(image)

            # Store the original image's features, label, and group ID in the memory-mapped arrays
            data[image_counter, :] = original_features  # Fill in the row corresponding to the current image
            labels[image_counter] = label
            groups[image_counter] = group_id
            image_counter += 1  # Increment counter to fill the next row

            # Prepare for augmentation
            image = np.expand_dims(image, axis=0)
            aug_iter = datagen.flow(image, batch_size=1)

            # Perform 4 augmentations per image
            for _ in range(4):
                aug_img = next(aug_iter)[0].astype(np.uint8)
                features = extract_features(aug_img)

                # Store augmented image features, label, and group ID
                data[image_counter, :] = features
                labels[image_counter] = label
                groups[image_counter] = group_id
                image_counter += 1  # Increment counter for the next augmented image

            # Increment group ID for the next set of images
            group_id += 1

        except Exception as e:
            print(e,f"{img_path},  {img_name}")

# Flush the changes to disk to ensure everything is saved
data.flush()
labels.flush()
groups.flush()


OpenCV(4.10.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'
 /content/drive/My Drive/Fabric Detection Project/textures 3/corduroy/.ipynb_checkpoints,  .ipynb_checkpoints


**Freeing up the memory**

In [None]:
import gc
gc.collect()
del data
del labels
del groups

**Load the data,labels and groups**

In [None]:
import numpy as np
import os

# Memory-mapped file paths
output_dir = '/content/drive/My Drive/Fabric Detection Project/Extracted_features'
data_file = os.path.join(output_dir, 'data_file.dat')
labels_file = os.path.join(output_dir, 'labels_file.dat')
groups_file = os.path.join(output_dir, 'groups_file.dat')

# Known values
#total_images = 17780  # Total original images + augmentations
#feature_size = 49162   # Number of features per image

# Load the memory-mapped arrays for reading
data = np.memmap(data_file, dtype='float32', mode='r', shape=(total_images, feature_size))
labels = np.memmap(labels_file, dtype='object', mode='r', shape=(total_images,))
groups = np.memmap(groups_file, dtype='int32', mode='r', shape=(total_images,))

In [None]:
print(data.shape)

(17780, 49162)


**Data** contains the input data (data) <br>
**Labels** contains the output data (Y)

**Saving the data,labels and groups**

In [None]:
# Convert lists to NumPy arrays
data_np = np.array(data)
labels_np = np.array(labels)
groups_np = np.array(groups)

if not os.path.exists("Extracted_features"):
    os.makedirs("Extracted_features")
# Save to a .npz file
np.savez('Extracted_features\\data.npz', data=data_np)
np.savez('Extracted_features\\labels.npz', labels=labels_np)
np.savez('Extracted_features\\groups.npz',groups=groups_np)


: 

In [None]:
import os
import numpy as np

# Directory where your .npz files are stored
npz_directory = './'  # Replace with the correct directory path if necessary

# Automatically list all .npz files in the directory
npz_files = [f for f in os.listdir(npz_directory) if f.endswith('.npz')]

# Initialize empty lists to store the extracted data
all_data = []
all_labels = []
all_groups = []

# Iterate through each saved .npz file and load the arrays
for file in npz_files:
    file_path = os.path.join(npz_directory, file)
    # Load the saved .npz file
    with np.load(file_path) as data:
        all_data.append(data['data'])    # Append 'data' array
        all_labels.append(data['labels']) # Append 'labels' array
        all_groups.append(data['groups']) # Append 'groups' array

# Now concatenate all arrays to create final datasets
final_data = np.concatenate(all_data, axis=0)
final_labels = np.concatenate(all_labels, axis=0)
final_groups = np.concatenate(all_groups, axis=0)

# Your final arrays are now ready to use
print("Final data shape:", final_data.shape)
print("Final labels shape:", final_labels.shape)
print("Final groups shape:", final_groups.shape)


: 

--------------------------------End----------------------------------

MOVE TO model_training.ipynb

**Loading the saved data and labels**

In [None]:
# Load from the .npz file
loaded_data = np.load('Extracted_features\\data.npz')
loaded_labels = np.load('Extracted_features\\labels.npz')
loaded_groups = np.load('Extracted_features\\groups.npz')

data_loaded = loaded_data["data"]
labels_loaded = loaded_labels["labels"]
groups_loaded = loaded_labels["groups"]

In [None]:
# Checking the shape of the dataset and the labels
print(f"Dataset shape: {data_loaded.shape}")
print(f"Labels shape: {labels_loaded.shape}")
print(f"Groups shape: {groups_loaded.shape}")

Dataset shape: (17780, 49162)
Labels shape: (17780,)


From the above output we can see that we have **49162 features** and **17780 rows**

In [None]:
data = data_loaded
labels = labels_loaded

### Label Encoding of target variable

In [None]:
# Encode labels
le = LabelEncoder()
labels = le.fit_transform(labels)

### Train-test split

In [None]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(np.array(data), np.array(labels), test_size=0.3, random_state=42)
print("X_train shape: ",X_train.shape)
print("X_test shape: ",X_test.shape)
print("y_train shape: ",y_train.shape)
print("y_test shape: ",y_test.shape)

X_train shape:  (12446, 49162)
X_test shape:  (5334, 49162)
y_train shape:  (12446,)
y_test shape:  (5334,)


### Standardization

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Dimensionality Reduction using PCA(Principal Component Analysis)

In [None]:
# Dimensionality Reduction using PCA
pca = PCA(n_components=1000)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

### Model Training

In [None]:
# Train a Random Forest Classifier
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest_clf.fit(X_train, y_train)

In [None]:
# Apply SVM model (using RBF kernel )
svm_clf = SVC(kernel='rbf', random_state=42)
svm_clf.fit(X_train, y_train)

### Model Evaluation

In [None]:
# Predictions and Evaluation
y_pred = random_forest_clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

    corduroy       0.93      0.68      0.79      1055
      cotton       0.65      0.89      0.75      1062
       denim       0.80      0.94      0.86      1092
       linin       0.83      0.61      0.70      1091
        wool       0.65      0.63      0.64      1034

    accuracy                           0.75      5334
   macro avg       0.77      0.75      0.75      5334
weighted avg       0.77      0.75      0.75      5334

[[ 718   99   78   41  119]
 [   8  946   21   31   56]
 [   0    2 1023    7   60]
 [  21  235   46  668  121]
 [  26  180  114   61  653]]


In [None]:
# Predictions and Evaluation
y_pred = svm_clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

    corduroy       0.52      0.73      0.61      1055
      cotton       0.43      0.56      0.48      1062
       denim       0.96      0.82      0.88      1092
       linin       0.60      0.42      0.49      1091
        wool       0.63      0.48      0.55      1034

    accuracy                           0.60      5334
   macro avg       0.63      0.60      0.60      5334
weighted avg       0.63      0.60      0.61      5334

[[766 163  11  50  65]
 [240 592   5 158  67]
 [ 79   8 896  30  79]
 [147 395   7 457  85]
 [228 225  15  65 501]]


**Checking class imbalance**

In [None]:
print(pd.Series(labels_loaded).value_counts())


linin       3585
denim       3575
corduroy    3550
cotton      3535
wool        3535
Name: count, dtype: int64


In [None]:
print(len(pd.Series(labels_loaded)))

In [None]:
print((pd.Series(labels_loaded).value_counts() / len(pd.Series(labels_loaded)))*100)


Classes are balanced