# Dice Detection Project
Dice detection project for Computer Vision (include description)

## 1. Import Dependencies

In [None]:
import os
import shutil
from concurrent.futures import ThreadPoolExecutor

import kagglehub
from ultralytics import YOLO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image

Project constants

In [None]:
path = kagglehub.dataset_download("nellbyler/d6-dice")
print("Path to dataset files:", path)

# ANNOTATION_PATH = path + "/d6-dice/Annotations"
# IMAGE_PATH = path + "/d6-dice/Images"

ANNOTATION_PATH = os.path.join(path, "d6-dice", "Annotations")
IMAGE_PATH = os.path.join(path, "d6-dice", "Images")

# windows delim
split_char = '\\'

# linux delim
#split_char = '/'

## 2. Split data into train/test/validation

In [None]:
anotation_files = []
for dir,_,files in os.walk(ANNOTATION_PATH):
    for filename in files:
        if filename[-4:] == '.txt':
            anotation_files += [(os.path.join(dir,filename))]

# remove classes.txt file from image pool
anotation_files.pop(0)

# 70/15/15 train-test split
train_ratio = .7
test_ratio = (1.0 - train_ratio) / 2
valid_ratio = test_ratio

SIZE = len(anotation_files)
N = list(range(SIZE))

train_size = int(SIZE * train_ratio)
test_size = int(SIZE * test_ratio)
valid_size = int(SIZE * valid_ratio)

# Add any files that might have been accidentally skipped to the train set
while((train_size + test_size + valid_size) < len(anotation_files)):
    train_size += 1

print(train_size)
print(test_size)
print(valid_size)
print(len(anotation_files))

## 3. Organize and prep data for YOLO

In [None]:
# Create directories if they don't exist
os.makedirs('datasets/train/images', exist_ok=True)
os.makedirs('datasets/train/labels', exist_ok=True)

os.makedirs('datasets/test/images', exist_ok=True)
os.makedirs('datasets/test/labels', exist_ok=True)

os.makedirs('datasets/valid/images', exist_ok=True)
os.makedirs('datasets/valid/labels', exist_ok=True)

# helper to organize files into their respective directories
def copy_files(i, dir, ano_files):
    # get the image filename
    ano_file = ano_files[i]
    img_file = ano_file.split(split_char)[-1][0:-4] + '.jpg'

    # copy into folders
    shutil.copy(ano_file, dir + '/labels')
    shutil.copy(os.path.join(IMAGE_PATH, img_file), os.path.join(dir + '/images/', img_file))

# copy image and label files into local directories
for i in range(train_size):
    copy_files(i, './datasets/train', anotation_files)

test_start = train_size + valid_size
test_end = test_start + test_size
for i in range(test_start, test_end):
    copy_files(i, './datasets/test', anotation_files)


valid_start = train_size
valid_end = valid_start + valid_size
for i in range(valid_start, valid_end):
    copy_files(i, './datasets/valid', anotation_files)

In [None]:
import yaml

source_path = current_directory = os.getcwd()

train_path = os.path.join(source_path, "datasets", "train")
test_path = os.path.join(source_path, "datasets", "test")
valid_path = os.path.join(source_path, "datasets", "valid")

data_yaml = dict(
    train = train_path,
    test = test_path,
    val = valid_path,
    
    nc = 6,
    names = list('123456'),
    device = 'cpu'
)

with open('data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)

In [None]:
# load model
model = YOLO("yolo11x.pt")  # build from YAML and transfer weights

## 4. Train Model

In [None]:
train_results = model.train(
    data = "./data.yaml",
    epochs = 12,
    imgsz = 480
)

## 5. Masking

In [None]:
pred_dataset_path = kagglehub.dataset_download("koryakinp/d6-dices-images")

dataset_images = os.path.join(pred_dataset_path, "dataset-images")

print(pred_dataset_path)

image_dir = os.listdir(os.path.join(pred_dataset_path, 'dataset-images'))
image_list = []
for file in image_dir:
    image_list.append(os.path.join(pred_dataset_path,'dataset-images', file))

print(image_list[0])

In [None]:
masked_images = []

Mask images

In [None]:
os.makedirs('test_images/masked_images', exist_ok=True)

def save_im(image_path, result):
    cv2.imwrite(image_path, result)

with ThreadPoolExecutor(max_workers=5) as executor:
    futures = []
    for i in range(len(image_list)):
        image = cv2.imread(image_list[i])
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        _, dice_mask = cv2.threshold(gray_image, 125, 255, cv2.THRESH_BINARY_INV)
        result = cv2.bitwise_and(image, image, mask=dice_mask)
        result[dice_mask == 0] = [255, 255, 255]
        result[dice_mask != 0] = [0, 0, 0]

        #plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
        #plt.axis("off")
        #plt.title("Masked Dice")
        #plt.show()
        #masked_images.append(result)

        image_name = 'masked_image'+str(i)+'.jpg'

        im_name = os.path.join('test_images', 'masked_images', image_name)
        masked_images.append(im_name)

        # output to file
        futures.append(executor.submit(save_im, im_name, result))

    # wait to retrieve all futures
    for future in futures:
        future.result()

In [None]:
def show_image(image_path):
    # Load the image from the directory
    image = Image.open(image_path)

    # Display the image using matplotlib
    plt.imshow(image)
    plt.axis('off')  # Hide the axis
    plt.show()

show_image(image_list[1])

## 6. Predict Using a Different Dataset

In [None]:
def get_results(results, image_list, file_name):
    keys = [0, 1, 2, 3, 4, 5]
    values = [1, 2, 3, 4, 5, 6]

    image_list = sorted(image_list)

    class_dict = dict(zip(keys, values))

    data = pd.DataFrame(columns= range(6))
    for i in range(len(results)):
        # add the data to a Data Frame
        result = pd.DataFrame(results[i].boxes.data.cpu().numpy()).astype(float)
        
        # append the 
        file = image_list[i]
        result['img index'] = int(i)
        result['file'] = file

        data = pd.concat([data, result], axis=0)

    # rename columns
    data.columns = ['x', 'y', 'x2', 'y2', 'cl', 'label', 'i', 'file']

    # apply correct labellings
    data['label'] = data['label'].map(class_dict)
    data['i'] = data['i'].astype(int)
    data = data.reset_index(drop=True)

    # output data
    os.makedirs("results", exist_ok=True)
    data.to_csv(os.path.join("results", "data", file_name + ".csv"), index=False, header=True)

    display(data)
    
    return data

In [None]:
show_image(image_list[21])

Predict bounding boxes and values of the dice in another dataset

In [None]:
trained_model = YOLO(os.path.join("runs", "detect", "train4", "weights", "best.pt"))
#trained_model_e20 = YOLO(os.path.join("runs", "detect", "train", "weights", "best.pt"))

Predict with epoch = 12

In [None]:
results = trained_model.predict(source=image_list[:20], conf=0.2)
results_masked = trained_model.predict(source=masked_images[:20], conf=0.2)

Predict with epoch=20

In [None]:
#results_e20 = trained_model_e20.predict(source=image_list[:20], conf=0.2)
#results_e20_masked = trained_model_e20.predict(source=masked_images[:20], conf=0.2)

Generate Data Frames

In [None]:
os.makedirs(os.path.join("results","data"), exist_ok=True)

orig_df = get_results(results, image_list, "orig_e12")
masked_df = get_results(results_masked, masked_images, "masked_e12")

#orig_df_e20 = get_results(results_e20, image_list, "orig_e20")
#masked_df_e20 = get_results(results_e20_masked, masked_images, "masked_e20")

## 7. Remove duplicate bounding boxes

Helper Functions

In [None]:
def coincides(b1, b2, scale=0.1):
    # bounding box (BB) 1 diagonal length
    p1 = np.array([b1[0], b1[1]])
    p2 = np.array([b1[2], b1[3]])
    threshold = np.linalg.norm(p1 - p2) * scale

    # distance between BB 1 and BB 2
    p1 = np.array([b1[0], b1[1]])
    p2 = np.array([b2[0], b2[1]])
    dist = np.linalg.norm(p1 - p2)
    
    return dist < threshold

Removal of Extra Bounding Boxes

In [None]:
# compare bounding boxes
def compare_boxes(data, cl):
    remove_idx = []
    for i in range(len(data)):
        # if data was already removed, then skip
        if i in remove_idx:
            continue
        
        # obtain first bounding box
        b1 = data[i]

        for j in range(len(data)):
            # if data was already removed, then skip
            if j in remove_idx:
                continue

            # obtain second bounding box
            b2 = data[j]

            if np.array_equiv(b1, b2):
                continue
            
            # if 2 bounding boxes coincide, then take the value with the higher confidence level
            if coincides(b1, b2, .05):
                remove_idx.append(j if cl[i] > cl[j] else i)
    
    return remove_idx

# remove any duplicate bounding boxes
def remove_dup_BB(df):
    max_index = df.iloc[-1]['i'] + 1

    offset = 0
    remove_idx = []
    for index in range(max_index):
        # remove the label, confidence level, image index, and image path
        view = df[df['i'] == index].iloc[:, :-4]
        cl = df[df['i'] == index]['cl']
        cl = cl.reset_index(drop=True)

        # remove duplicate bounding boxes
        dup_idx = compare_boxes(view.to_numpy(), cl)
        dup_idx = [i + offset for i in dup_idx]

        remove_idx.extend(dup_idx)

        # increment offset
        offset = offset + len(view)

    # filter out wrong bounding boxes
    if remove_idx:
        filtered_df = df.drop(remove_idx)
    else:
        return df

    return filtered_df

Output dataframe to csv

In [None]:
os.makedirs(os.path.join("results","filtered"), exist_ok=True)

filtered_orig_e12_df = remove_dup_BB(orig_df)
filtered_orig_e12_df.to_csv(os.path.join("results", "filtered", "filtered_orig.csv"), index=False, header=True)

#filtered_orig_e20_df = remove_dup_BB(orig_df_e20)
#filtered_orig_e20_df.to_csv(os.path.join("results", "filtered", "filtered_orig_e20.csv"), index=False, header=True)

filtered_mask_e12_df = remove_dup_BB(masked_df)
filtered_mask_e12_df.to_csv(os.path.join("results", "filtered", "filtered_masked.csv"), index=False, header=True)

#filtered_mask_e20_df = remove_dup_BB(masked_df_e20)
#filtered_mask_e20_df.to_csv(os.path.join("results", "filtered", "filtered_masked_e20.csv"), index=False, header=True)

show removed results

In [None]:
# display difference
def show_diff(orig, filt):
    display(pd.concat([orig, filt]).drop_duplicates(keep=False))

print("epoch=12")
show_diff(orig_df, filtered_orig_e12_df)
show_diff(masked_df, filtered_mask_e12_df)

#print("epoch=20")
#show_diff(orig_df_e20, filtered_orig_e20_df)
#show_diff(masked_df_e20, filtered_mask_e20_df)

In [None]:
dict_list = []
i = 0

roll_map = {1:0,
            2:0,
            3:0,
            4:0,
            5:0,
            6:0}
for file,label in zip(orig_df['i'], orig_df['label']):
    if(file!=i):
        i = file
        #print(roll_map)
        dict_list.append(roll_map)
        roll_map = {1:0,
                    2:0,
                    3:0,
                    4:0,
                    5:0,
                    6:0}
        continue
    roll_map[label]+=1
dict_list.append(roll_map)

In [None]:
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk  # Use PIL for image handling

rolls = dict_list[9]
image_path = image_list[9]
sum = rolls[1] + rolls[2]*2 + rolls[3]*3 + rolls[4]*4 + rolls[5]*5 + rolls[6]*6

data = [["1's", rolls[1]],
        ["2's", rolls[2]],
        ["3's", rolls[3]],
        ["4's", rolls[4]],
        ["5's", rolls[5]],
        ["6's", rolls[6]],
        ["SUM", sum]]

# Create the main application window
root = tk.Tk()
root.title("Table with Image Example")

# Create a style for the Treeview
style = ttk.Style()
style.configure("Treeview", font=("Helvetica", 14))
style.configure("Treeview.Heading", font=("Helvetica", 16, "bold"))

# Frame to hold the table and image
main_frame = tk.Frame(root)
main_frame.pack(expand=True, fill=tk.BOTH)

# Add a treeview for the table
table_frame = tk.Frame(main_frame)
table_frame.pack(side=tk.LEFT, expand=True, fill=tk.BOTH, padx=10, pady=10)

columns = ["Dice #", "Roll count"]
tree = ttk.Treeview(table_frame, columns=columns, show="headings")
for col in columns:
    tree.heading(col, text=col)
    tree.column(col, width=120, anchor="center")

for row in data:
    tree.insert("", tk.END, values=row)

tree.pack(expand=True, fill=tk.BOTH)

# Add an image to the side
image_frame = tk.Frame(main_frame)
image_frame.pack(side=tk.RIGHT, padx=10, pady=10)

try:
    img = Image.open(image_path)
    img = img.resize((250, 250))  # Resize the image to fit
    tk_img = ImageTk.PhotoImage(img)

    # Display the image in a Label and keep the reference
    image_label = tk.Label(image_frame, image=tk_img)
    image_label.image = tk_img  # Keep a reference to avoid garbage collection
    image_label.pack()
except FileNotFoundError:
    image_label = tk.Label(image_frame, text="Image not found", font=("Helvetica", 14))
    image_label.pack()
except Exception as e:
    print(f"An unexpected error occurred: {e}")  # Debugging fallback

# Start the application
root.mainloop()