In [1]:

import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from skimage import feature

from pathlib import Path
import random
import gc

## Descriptores

### Descriptor temporal

In [2]:

def senalTemporal(cap, esq, n, m, i):

  k = 0
  signal = np.zeros(m, dtype=np.float32)

  for j in range(i, i+m):
    frame = cap[j]

    parche = frame[esq[0]:esq[0]+n, esq[1]:esq[1]+n]
    brillo_medio_parche = cv.cvtColor(parche, cv.COLOR_BGR2HSV)[:,:,2]

    signal[k] = np.mean(brillo_medio_parche)
    k += 1

  return signal

### Descriptor espacial

In [3]:
def histogramaLBP(cap, esq, n, m, i):
  radio_lbp = 1
  n_puntos = 8 * radio_lbp
  frame = cap[i]


  parche = cv.cvtColor(frame[esq[0]:esq[0]+n, esq[1]:esq[1]+n], cv.COLOR_BGR2GRAY)

  parche_lbp = feature.local_binary_pattern(parche, n_puntos, radio_lbp, method="default")
  n_bins = 256
  hist, bins = np.histogram(parche_lbp, density=True, bins=n_bins, range=(0, n_bins))

  return hist, bins

In [14]:
def obtenerDescriptor(cap, esq, n, m, i):

  # Calculamos el descriptor temporal
  senal = senalTemporal(cap, esq, n, m, i)
  fft_senal = np.abs(np.fft.fft(senal.astype(np.float32)))
  l1_norm = np.linalg.norm(fft_senal, ord=1)
  l1_fft = fft_senal / l1_norm if l1_norm != 0 else np.zeros(m, dtype=np.float32)
  

  # Calculamos el descriptor espacial
  espacial, _ = histogramaLBP(cap, esq, n, m, i)

  # Concatenamos ambos descriptores
  descriptor = np.concatenate((l1_fft, espacial), axis=None).astype(np.float32)

  return descriptor

In [5]:
def esParcheValido(esq, frame_shape, n):
  W, H = frame_shape
  x, y = esq

  return y + n <= W and x + n <= H


In [6]:

def obtenerCoordenadaEnGrilla(frame_shape, n):
  W, H = frame_shape

  x = random.randint(0, W//n - 1)
  y = random.randint(0, H//n - 1)


  return (y*n, x*n)


In [7]:
def listar_archivos(directorio):
    path_object = Path(directorio)
    sorted_items = sorted([archivo.absolute() for archivo in path_object.rglob('*') if archivo.is_file()], key=lambda item: item.name)
    return [str(item) for item in sorted_items]

#### Funciones para extraer descriptores para datos de Train y de Test

In [12]:
def obtenerDescriptoresDeTrain(videos_train, cant_patches, n, m):
  train_descriptors = []
  train_masks = []

  k = 0

  for video_path, mask_path in videos_train:
    k += 1
    if k % 20 == 0:
      np.save(f'1_train_data_{k}', np.array(train_descriptors))
      np.save(f'1_train_masks_{k}', np.array(train_masks))

      train_descriptors.clear()
      train_masks.clear()

    cap = cv.VideoCapture(video_path)

    frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) - m - 1
    W = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
    H = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

    cant_patches_per_frame = max(cant_patches // frames, 1) # Por si hay más frames que cant_patches

    if mask_path != 'NO HAY':
      mask = cv.imread(mask_path, cv.IMREAD_GRAYSCALE)
    else:
      mask = np.zeros((H, W), dtype=np.uint8)

    video = []

    while cap.isOpened():
      ret, frame = cap.read()

      if not ret:
          break

      video.append(frame)

    cap.release()

    video = np.array(video)

    gc.collect()

    for i in range(video.shape[0]-m-1):

      for _ in range(cant_patches_per_frame):
        esq = obtenerCoordenadaEnGrilla((W,H), n)

        while not esParcheValido(esq, (W,H), n):
          esq = obtenerCoordenadaEnGrilla((W,H), n)

        des = obtenerDescriptor(video, esq, n, m, i)
        mask_patch = mask[esq[0]:esq[0]+n, esq[1]:esq[1]+n]

        if des.shape != (456,):
          print(des.shape)
          raise("error")
        
        train_descriptors.append(des)
        train_masks.append(1 if np.sum(mask_patch == 255) > (mask_patch.size / 2) else 0)

  
  np.save(f'1_train_data_{k}', np.array(train_descriptors))
  np.save(f'1_masks_train_{k}', np.array(train_masks))

  train_descriptors.clear()
  train_masks.clear()

In [9]:
def obtenerDescriptoresDeTest(videos_test, n, m):
  test_descriptors = []
  test_masks = []
  max_frames = 20

  for video_path, mask_path in videos_test:
    cap = cv.VideoCapture(video_path)

    frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
    W = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
    H = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

    if mask_path != 'NO HAY':
      mask = cv.imread(mask_path, cv.IMREAD_GRAYSCALE)
    else:
      mask = np.zeros((H, W), dtype=np.uint8)

    video = []

    while cap.isOpened():
      ret, frame = cap.read()

      if not ret:
          print("Can't receive frame (stream end?). Exiting ...")
          break

      video.append(frame)

    cap.release()

    video = np.array(video)

    gc.collect()

    for i in range(min(max_frames, video.shape[0])):
      video_descriptors = []
      print(i)
      for x in range(0, H, n):
        for y in range(0, W, n):
          des = obtenerDescriptor(video, (x,y), n, m, i)
          video_descriptors.append(des)

      mask_patch = mask[x:x+n, y:y+n]
      test_descriptors.append(video_descriptors)
      test_masks.append(1 if np.sum(mask_patch == 255) > (mask_patch.size / 2) else 0)


  return np.array(test_descriptors), np.array(test_masks)

### Crear el dataset de Train

In [None]:
def splitDataset():
  train_split = 0.8

  videos_no_agua = listar_archivos(r"**PATH_TO_NON_WATER_RESIDUAL_VIDEOS**")
  videos_agua = listar_archivos(r"**PATH_TO_WATER_RESIDUAL_VIDEOS**")

  masks_no_agua = ["NO HAY" for i in range(len(videos_no_agua))]
  masks_agua = listar_archivos(r"**PATH_TO_WATER_VIDEOS_MASKS**")

  videos_agua = list(zip(videos_agua, masks_agua))
  videos_no_agua = list(zip(videos_no_agua, masks_no_agua))

  proporcion_videos_agua = len(videos_agua) / (len(videos_agua) + len(videos_no_agua))
  proporcion_videos_no_agua = 1 - proporcion_videos_agua

  cant_total_videos_train = (len(videos_agua) + len(videos_no_agua)) * train_split
  cant_total_videos_test = (len(videos_agua) + len(videos_no_agua)) - cant_total_videos_train

  cant_videos_agua_train = int(cant_total_videos_train * proporcion_videos_agua)
  cant_videos_no_agua_train = int(cant_total_videos_train - cant_videos_agua_train)

  cant_videos_agua_test = int(cant_total_videos_test * proporcion_videos_agua)
  cant_videos_no_agua_test = int(cant_total_videos_test - cant_videos_agua_test)

  random.shuffle(videos_agua)
  random.shuffle(videos_no_agua)

  videos_train = videos_agua[:cant_videos_agua_train] + videos_no_agua[:cant_videos_no_agua_train]
  videos_test = videos_agua[cant_videos_agua_train:] + videos_no_agua[cant_videos_no_agua_train:]

  random.shuffle(videos_train)
  random.shuffle(videos_test)


  return videos_train, videos_test

In [11]:
# 100 videos sin agua
# 160 videos con agua
def generarDatosTrain():

  n = 20
  m = 200
  cant_patches = 2500


  videos_train, videos_test = splitDataset()

  np.save("train_split_path", np.array(videos_train))
  np.save("test_split_path", np.array(videos_test))

  obtenerDescriptoresDeTrain(videos_train, cant_patches, n, m)



In [None]:
random.seed(47)
generarDatosTrain()

### Cargamos los datos de Train del disco y entrenamos el Random Forest

In [None]:

from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Cargar datos
train_desc = np.concatenate((np.load("1_train_data_20.npy"),\
                            np.load("1_train_data_40.npy"),\
                            np.load("1_train_data_60.npy"),\
                            np.load("1_train_data_80.npy"),\
                            np.load("1_train_data_100.npy"),\
                            np.load("1_train_data_120.npy"),\
                            np.load("1_train_data_140.npy"),\
                             np.load("1_train_data_160.npy"),\
                             np.load("1_train_data_180.npy"),\
                             np.load("1_train_data_200.npy"),\
                             np.load("1_train_data_208.npy")), axis=0)
y_multi = np.concatenate((np.load("1_train_masks_20.npy"),\
                        np.load("1_train_masks_40.npy"),\
                        np.load("1_train_masks_60.npy"),
                        np.load("1_train_masks_80.npy"),\
                        np.load("1_train_masks_100.npy"),\
                        np.load("1_train_masks_120.npy"),\
                        np.load("1_train_masks_140.npy"),\
                          np.load("1_train_masks_160.npy"),
                          np.load("1_train_masks_180.npy"),
                          np.load("1_train_masks_200.npy"),
                          np.load("1_masks_train_208.npy")), axis=0)


y_train = np.array(y_multi)


model = RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=47, n_jobs=4)
model.fit(train_desc, y_multi)

#### Guardamos el modelo en el disco

In [36]:
import joblib

joblib.dump(model, 'random_forest_model_hybrid.joblib')

['random_forest_model_hybrid.joblib']

### Generamos las segmentaciones en los videos de Test

In [None]:
import os
import joblib

model = joblib.load("random_forest_model_hybrid.joblib")
videos = np.load("test_split_path.npy")

for video in videos:
  print(video[0])
  
  descriptores = obtenerDescriptoresDeTest([video], 20, 200)[0]


  ancho = 800
  altura = 600
  fps = 5
  fourcc = cv.VideoWriter_fourcc(*'XVID')
  codec = fourcc

  ruta_modificada = Path(video[0].replace('Residuales', 'Segmentacion test')).parts
  ruta_sin_videos = Path(*[parte for parte in ruta_modificada if parte != 'videos'])

  out = cv.VideoWriter(str(ruta_sin_videos), codec, fps, (ancho, altura), False)

  for frame_descriptor in descriptores:

    # Calculamos la probabilidad que tiene cada parche de contener agua o no
    p = np.array(model.predict_proba(frame_descriptor)[:,0]).reshape(30,40)

    # Interpolamos para aumentar el tamaño del frame a 800x600
    p = cv.resize(p, (800, 600), interpolation=cv.INTER_CUBIC)
    
    # Umbralizamos para obtener la clasificación de cada parche
    p = (p < 0.5).astype(np.uint8) * 255

    out.write(p)
    
  out.release()

También generamos las segmentaciones para algunos de los videos de Train

In [None]:
import os
import joblib

model = joblib.load("random_forest_model_hybrid.joblib")
videos = np.load("train_split_path.npy")

for video in videos:
  print(video[0])

  if "non_water" in Path(video[0]).parts:
    if random.random() > 0.1:
      continue
  
  descriptores = obtenerDescriptoresDeTest([video], 20, 200)[0]

  ancho = 800
  altura = 600
  fps = 5
  fourcc = cv.VideoWriter_fourcc(*'XVID')
  codec = fourcc

  ruta_modificada = Path(video[0].replace('Residuales', 'Segmentacion')).parts
  ruta_sin_videos = Path(*[parte for parte in ruta_modificada if parte != 'videos'])

  out = cv.VideoWriter(str(ruta_sin_videos), codec, fps, (ancho, altura), False)

  for frame_descriptor in descriptores:
    p = np.array(model.predict_proba(frame_descriptor)[:,0]).reshape(30,40)
    p = cv.resize(p, (800, 600), interpolation=cv.INTER_CUBIC)
    p = (p < 0.5).astype(np.uint8) * 255

    out.write(p)
  out.release()