In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/

In [None]:
!kaggle datasets download -d justin900429/3d-printer-defected-dataset

Downloading 3d-printer-defected-dataset.zip to /content
 98% 52.0M/52.8M [00:02<00:00, 30.9MB/s]
100% 52.8M/52.8M [00:02<00:00, 23.4MB/s]


In [None]:
!unzip \*.zip

Archive:  3d-printer-defected-dataset.zip
  inflating: defected/bed_not_stick_0.jpg  
  inflating: defected/bed_not_stick_1.jpg  
  inflating: defected/bed_not_stick_10.jpg  
  inflating: defected/bed_not_stick_11.jpg  
  inflating: defected/bed_not_stick_12.jpg  
  inflating: defected/bed_not_stick_13.jpg  
  inflating: defected/bed_not_stick_14.jpg  
  inflating: defected/bed_not_stick_15.jpg  
  inflating: defected/bed_not_stick_16.jpg  
  inflating: defected/bed_not_stick_17.jpg  
  inflating: defected/bed_not_stick_18.jpg  
  inflating: defected/bed_not_stick_19.jpg  
  inflating: defected/bed_not_stick_2.jpg  
  inflating: defected/bed_not_stick_20.jpg  
  inflating: defected/bed_not_stick_21.jpg  
  inflating: defected/bed_not_stick_22.jpg  
  inflating: defected/bed_not_stick_23.jpg  
  inflating: defected/bed_not_stick_24.jpg  
  inflating: defected/bed_not_stick_25.jpg  
  inflating: defected/bed_not_stick_26.jpg  
  inflating: defected/bed_not_stick_27.jpg  
  inflating: def

In [None]:
# Set the path to the folders containing cat and dog images
defect_folder = '/content/defected'
no_defect_folder = '/content/no_defected'

In [None]:
# Function to load and preprocess the images
def load_images(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))  # Resize the image to 224x224 pixels
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert the image to RGB format
        images.append(img)
        labels.append(label)
    return images, labels

In [None]:
import os
import cv2
import numpy as np

In [None]:
# Load and preprocess cat images
defect_images, defect_labels = load_images(defect_folder, 0)

# Load and preprocess dog images
no_defect_images, no_defect_labels = load_images(no_defect_folder, 1)

In [None]:
images = defect_images + no_defect_images
labels = defect_labels + no_defect_labels

images = np.array(images)
labels = np.array(labels)

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split


In [None]:
def generate_bags(images, window_size):
    bags = []
    num_subregions_list = []  # Store the number of subregions per image
    for image in images:
        subregions = []
        height, width = image.shape[:2]
        for y in range(0, height - window_size + 1, window_size):
            for x in range(0, width - window_size + 1, window_size):
                subregion = image[y:y+window_size, x:x+window_size]
                subregions.append(subregion)
        bags.append(subregions)
        num_subregions_list.append(len(subregions))
    return bags, num_subregions_list

In [None]:
def assign_bag_labels(image_labels, num_subregions):
    bag_labels = []
    for label, num in zip(image_labels, num_subregions):
        # If the image contains a defect (label = 1), all subregions are positive bags
        # Otherwise (label = 0), all subregions are negative bags
        bag_labels.extend([label] * num)
    return bag_labels

In [None]:
model = SVC(kernel='linear')

# Step 4: Model Training
# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

In [None]:
bags_train, num_subregions_train = generate_bags(X_train, window_size=64)
bag_labels_train = assign_bag_labels(y_train, num_subregions_train)


In [None]:
len(bag_labels_train)

11205

In [None]:
bags_train_flat = np.array([subregion.flatten() for bag in bags_train for subregion in bag])
bag_labels_train_flat = np.hstack(bag_labels_train)

In [None]:
len(bag_labels_train_flat)

11205

In [None]:
model.fit(bags_train_flat, bag_labels_train_flat)


In [None]:
bags_test, num_subregions_test = generate_bags(X_val, window_size=64)
bag_labels_test = assign_bag_labels(y_val, num_subregions_test)

bags_test_flat = np.array([subregion.flatten() for bag in bags_test for subregion in bag])
bag_labels_test_flat = np.hstack(bag_labels_test)

In [None]:
accuracy = model.score(bags_test_flat, bag_labels_test_flat)

In [None]:
accuracy

0.9654558404558404

In [None]:
predictions = model.predict(bags_test_flat)

# Reshape predictions to match the bags structure
predicted_labels = np.reshape(predictions, (len(bags_test), -1))

In [None]:
len(predicted_labels)

312

In [None]:
from sklearn import svm
from sklearn import datasets
import joblib

joblib.dump(model, "model.pkl")
# loaded_model = joblib.load("model.pkl")

['model.pkl']