In [11]:
import cv2 as cv
import numpy as np
import os
import shutil
import torch
import random

ModuleNotFoundError: No module named 'copyfile'

In [None]:
def filter(image, debug = 0):
    #Pasamos la imagen a una escala de grises
    gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    #Aplicamos un filtro gausiano para eliminar ruido
    blur = cv.GaussianBlur(gray, (5, 5), 0)
    #Convertimos la imagen a blanco y negro
    ret, thresh = cv.threshold(blur, 75, 255, cv.THRESH_BINARY_INV)
    #Mostramos la imagen si es necesario
    if debug != 0:
        cv.imwrite("filter.png", thresh)
    return ret, thresh

def centroid(image, debug = 0):
    #Encontramos todos los contornos
    contours, hierarchies = cv.findContours(image, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)

    if len(contours) != 0:  
        #Escogemos el contorno con mayor area
        c = max(contours, key = cv.contourArea)
        #Realizamos el cálculo de los centroides
        M = cv.moments(c)
        if M['m00'] != 0:
            cx = int(M['m10']/M['m00'])
            cy = int(M['m01']/M['m00'])
            if debug != 0:
                print(f"x: {cx} y: {cy}")
    return cx, cy

In [None]:
def crop(path, new_path):
    for item in os.listdir(path):
        if os.path.isfile(os.path.join(path, item)):
            image = cv.imread(os.path.join(path, item))
            if image is not None:
                ret, thresh = filter(image)
                cx, cy = centroid(thresh)
                w = 215
                imageOut = image[cy-w:cy+w+1, cx-w:cx+w+1]
                try:
                    cv.imwrite(os.path.join(new_path, item), imageOut)
                except:
                    continue

In [None]:
no_fail_path = "Dataset/Original/Sin Fallo"
fail_path = "Dataset/Original/Con Fallo"

new_no_fail_path =  "Dataset/Cortado/Sin Fallo"
new_fail_path =  "Dataset/Cortado/Con Fallo"

if os.path.exists(new_no_fail_path): shutil.rmtree(new_no_fail_path)
os.makedirs(new_no_fail_path)

if os.path.exists(new_fail_path): shutil.rmtree(new_fail_path)
os.makedirs(new_fail_path)

crop(no_fail_path, new_no_fail_path)
crop(fail_path, new_fail_path)

In [None]:
#Comprobamos si disponemos de GPU
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device:', device)

Using PyTorch version: 1.13.0  Device: cuda


In [None]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, TEST_DIR):
    #Get a list of all the file names
    images = list(file for file in os.listdir(SOURCE_DIR) if os.path.isfile(os.path.join(SOURCE_DIR, file)))
    #Now that we have our image filenames, we randomly shuffle them to be used for validation and training
    images = random.sample(images, len(images))
    #In this section, I decided to split data with the following ratio: 10% validation data, 90% training data. I first populate validation data 
    for i in range(len(images)):
        if i < len(images) *(0.08):
            shutil.copyfile(os.path.join(SOURCE_DIR, images[i]), os.path.join(TEST_DIR, images[i]))
        elif i < len(images) *(0.28):
            shutil.copyfile(os.path.join(SOURCE_DIR, images[i]), os.path.join(VALIDATION_DIR, images[i]))
        else:
            shutil.copyfile(os.path.join(SOURCE_DIR, images[i]), os.path.join(TRAINING_DIR, images[i]))

In [None]:
TRAIN_PATH = "Dataset/train" #Ruta de los datos de entrenamiento
VAL_PATH = "Dataset/validation" #Ruta de los datos de validación
TEST_PATH = "Dataset/test" #Ruta de los datos de test

if os.path.exists(TRAIN_PATH): shutil.rmtree(TRAIN_PATH)
os.makedirs(TRAIN_PATH)

if os.path.exists(VAL_PATH): shutil.rmtree(VAL_PATH)
os.makedirs(VAL_PATH)

if os.path.exists(TEST_PATH): shutil.rmtree(TEST_PATH)
os.makedirs(TEST_PATH)

split_data(new_no_fail_path, TRAIN_PATH, VAL_PATH, TEST_PATH)
split_data(new_fail_path, TRAIN_PATH, VAL_PATH, TEST_PATH)