<a href="https://colab.research.google.com/github/Morsalah/M.Sc-Research-HRI-using-DIGIT-tactile-sensor/blob/main/binaryclassification_ongoing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import time
import logging
import csv
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim
from digit_interface.digit import Digit
import torch
import matplotlib.pyplot as plt
import random
from models.simple_cnn import ConvNeuralNet
from  models.variables import learning_rate, num_classes
import torch.nn as nn

In [None]:
# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

1. creating directory


In [None]:
# Function to create directories
def create_directory(path: str):
    if not os.path.exists(path):
        os.makedirs(path)
        logger.info(f"Directory {path} created.")

2. labeling captured images by SSIM and pixel intensity difference

In [None]:
# Function to compare images using SSIM + Pixel Difference
def is_touch_detected(baseline_image, new_image, ssim_threshold=0.8, pixel_change_threshold=5):
    """Uses both SSIM and pixel intensity difference to determine touch detection."""
    # Convert images to grayscale
    baseline_gray = cv2.cvtColor(baseline_image, cv2.COLOR_BGR2GRAY)
    new_gray = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)

    # Compute Structural Similarity Index (SSIM)
    similarity_score = ssim(baseline_gray, new_gray)

    # Compute absolute pixel difference
    diff = cv2.absdiff(baseline_gray, new_gray)
    changed_pixels = np.sum(diff > 20)  # Count pixels with significant change (threshold 20)
    total_pixels = diff.size
    pixel_change_percent = (changed_pixels / total_pixels) * 100

    logger.debug(f"SSIM score: {similarity_score}, Pixel change: {pixel_change_percent:.2f}%")

    # If SSIM is low OR pixel difference is high, classify as touch
    return similarity_score < ssim_threshold or pixel_change_percent > pixel_change_threshold

3. auto labeling


In [None]:
# Capture and auto-label images
def capture_and_save_images(serial: str, save_path: str, num_images: int = 100, interval: float = 1.0):
    yes_path = os.path.join(save_path, "YES")
    no_path = os.path.join(save_path, "NO")
    create_directory(yes_path)
    create_directory(no_path)

    csv_filename = os.path.join(save_path, "labels.csv")
    csv_exists = os.path.exists(csv_filename)

    with open(csv_filename, mode="a", newline="") as file:
        writer = csv.writer(file)
        if not csv_exists:
            writer.writerow(["filename", "label"])  # Add header if file is new

        # Connect to the DIGIT device
        digit = Digit(serial)
        digit.connect()

        # Capture multiple baseline images (No Touch)
        print("Ensure NO TOUCH for baseline image capture...")
        time.sleep(2)
        baseline_images = []
        for _ in range(3):  # Capture 3 baseline images for better reference
            baseline_filename = os.path.join(save_path, f"baseline_{_}.png")
            digit.save_frame(baseline_filename)
            baseline_images.append(cv2.imread(baseline_filename))
            time.sleep(0.5)

        # Compute average baseline image
        baseline_image = np.mean(np.array(baseline_images), axis=0).astype(np.uint8)
        logger.info("Baseline images captured and averaged.")

        yes_count, no_count = 0, 0

        for i in range(num_images):
            try:
                # Capture new frame
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                image_filename = f"{save_path}/digit_{serial}_{timestamp}_{i+1}.png"
                digit.save_frame(image_filename)

                # Read the newly captured image
                new_image = cv2.imread(image_filename)

                # Auto-label based on SSIM + Pixel Difference
                if is_touch_detected(baseline_image, new_image):
                    label = "yes"
                    save_folder = yes_path
                    yes_count += 1
                else:
                    label = "no"
                    save_folder = no_path
                    no_count += 1

                # Move image to correct folder
                new_image_path = os.path.join(save_folder, os.path.basename(image_filename))
                os.rename(image_filename, new_image_path)

                # Log filename and label in CSV
                writer.writerow([new_image_path, label])
                logger.info(f"Image saved: {new_image_path} | Label: {label}")

                # Stop if 50 images per category are collected
                if yes_count >= num_images // 2 and no_count >= num_images // 2:
                    print("Collected required number of images for both categories.")
                    break

                # Wait for the specified interval
                time.sleep(interval)

            except Exception as e:
                logger.error(f"Error capturing image {i+1}: {e}")

        # Disconnect the device
        digit.disconnect()

In [None]:
if __name__ == "__main__":
    # Define parameters
    serial_number = "D21114"
    #serial_number = "D21115"
    save_directory = "./captured_images"
    total_images = 100  # 50 YES and 50 NO images

    # Start capturing images with auto-labeling
    capture_and_save_images(serial_number, save_directory, num_images=total_images)

4. counting captured images


In [None]:
import os
def count_images_in_subfolders(root_folder):
    """Count the number of images in the YES and NO subfolders within the given root folder."""
    # Define the subfolder names
    subfolders = ['YES', 'NO']

    # Initialize the count dictionary
    image_count = {'YES': 0, 'NO': 0}

    # Check if root folder exists
    if not os.path.exists(root_folder):
        print("Error: Root folder not found.")
        return

    # Loop through the subfolders (YES, NO)
    for subfolder in subfolders:
        subfolder_path = os.path.join(root_folder, subfolder)

        # Check if subfolder exists
        if os.path.exists(subfolder_path):
            # Count the image files in the subfolder
            for root_dir, _, files in os.walk(subfolder_path):
                # Filter out image files (e.g., .jpg, .png, .jpeg)
                image_files = [file for file in files if file.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp'))]
                image_count[subfolder] += len(image_files)

    # Print the results
    print(f"Images in 'YES' folder: {image_count['YES']}")
    print(f"Images in 'NO' folder: {image_count['NO']}")
    print(f"Total images: {image_count['YES'] + image_count['NO']}")

# Example Usage
if __name__ == "__main__":
    # Specify the path to the captured_images folder
    captured_images_folder = "./captured_images"
    count_images_in_subfolders(captured_images_folder)


5. display an image

In [None]:
def show_image(category):
    """
    Display an image from the specified category (YES/NO) in the 'captured_images' folder.
    """
    folder_path = os.path.join("captured_images", category.upper())
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' not found.")
        return

    images = [img for img in os.listdir(folder_path) if img.endswith(".png")]
    if not images:
        print(f"No images found in {folder_path}")
        return

    image_path = os.path.join(folder_path, images[0])
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.imshow(image_rgb)
    plt.title(f"Class: {category}")
    plt.axis("off")
    plt.show()


6. display image information

In [None]:
def show_image_info(image_path):
    """
    Display image information including size, type, datatype, number of pixels, and channels.
    """
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Could not read image.")
        return

    height, width, channels = image.shape
    size = os.path.getsize(image_path)
    dtype = image.dtype
    total_pixels = height * width

    print(f"Image Path: {image_path}")
    print(f"Size: {size} bytes")
    print(f"Type: {type(image)}")
    print(f"Data Type: {dtype}")
    print(f"height, width, channels: ({height},{width},{channels})")
    print(f"Total Pixels: {total_pixels}")
    print(f"Channels: {channels}")

# Example Usage
if __name__ == "__main__":
    show_image("YES")
    show_image("NO")
    example_image_path = "captured_images/YES/digit_D21115_20250210_152012_35.png"
    show_image_info(example_image_path)


 7. creating model class

In [None]:
import torch.nn as nn
# Creating a CNN class
class ConvNeuralNet(nn.Module):
#  Determine what layers and their order in CNN object
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.fc1 = nn.Linear(1600, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    # Progresses data across layers
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.max_pool1(out)

        out = self.conv_layer3(out)
        out = self.conv_layer4(out)
        out = self.max_pool2(out)

        out = out.reshape(out.size(0), -1)

        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

8. define loss function and optimizer

In [None]:
model = ConvNeuralNet(num_classes)
# Set Loss function with criterion
criterion = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)
total_step = len(train_loader)

9. define variables for ML

In [None]:
# Define relevant variables for the ML task
batch_size = 64
num_classes = 1
learning_rate = 0.001
num_epochs = 20

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


10. training

In [None]:
import torch
import matplotlib.pyplot as plt

num_epochs = 10  # Change this based on your training setting
losses = []  # Store loss values for visualization

for epoch in range(num_epochs):
    epoch_loss = 0  # Track total loss in an epoch
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()  # Accumulate loss

    avg_loss = epoch_loss / len(train_loader)  # Compute average loss per epoch
    losses.append(avg_loss)  # Store for visualization

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs+1), losses, marker='o', linestyle='-')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epochs')
plt.grid()

# Visualization of the learning rate effect
plt.figure(figsize=(12, 5))
plt.plot(range(1, num_epochs+1), losses, marker='o', linestyle='-')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epochs (Learning Rate Effect)')
plt.grid()

# Accuracy Plot
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs+1), accuracies, marker='s', linestyle='-', color='g')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Epochs')
plt.grid()

plt.tight_layout()
plt.show()

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total))