# Introdução


Esse notebook tem como objetivo destrinchar e entender o uso de Computação Visual (CV) para resolução de problemas.

Para isso, vou criar um projeto lúdico-didático: Classificação de gestos do personagem Trafalgar Law de One Piece.

A ideia é:
 - CV capta gesto e salva coordenadas
 - Modelo ML/DL classifica com base nas coordenadas o gesto
 - É printado na tela o gesto, efeitos visuais
 - Para Shambles, adiciona feature que simula o anime/mangá
 - Aponta para um objeto e *"substitui"* ele

# CV

In [2]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import numpy as np
import pandas as pd

In [None]:
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),      # polegar
    (0, 5), (5, 6), (6, 7), (7, 8),      # indicador
    (0, 9), (9, 10), (10, 11), (11, 12), # médio
    (0, 13), (13, 14), (14, 15), (15, 16), # anelar
    (0, 17), (17, 18), (18, 19), (19, 20), # mindinho
    (5, 9), (9, 13), (13, 17)            # palma base
]

BaseOptions = python.BaseOptions
HandLandmarker = vision.HandLandmarker
HandLandmarkerOptions = vision.HandLandmarkerOptions
VisionRunningMode = vision.RunningMode

options = HandLandmarkerOptions(
    base_options = BaseOptions(model_asset_path = "models/hand_landmarker.task"),
    running_mode = VisionRunningMode.VIDEO,
    num_hands = 1
)

dataset = []

modo_gravacao = None       
frames_restantes = 0        
contador = {"ROOM":0, "SHAMBLES":0, "CANCEL":0, "SCAN":0, "APONTAR":0} 

with HandLandmarker.create_from_options(options) as detector:
    camera = cv2.VideoCapture(0)    

    frame_timestamp = 0

    while True:
        sucesso, imagem = camera.read()
        if not sucesso:
            print("Falha na câmera")
            break

        # Converte pra RGB
        imagem_rgb = cv2.cvtColor(imagem, cv2.COLOR_BGR2RGB)

        # Cria objeto MP Image
        mp_imagem = mp.Image(image_format=mp.ImageFormat.SRGB, data=imagem_rgb)

        frame_timestamp += 33

        # Detecta
        resultados = detector.detect_for_video(mp_imagem, frame_timestamp)

        coords = None

        # Se achou mãos
        if resultados.hand_landmarks:
            for hand_landmarks in resultados.hand_landmarks:
                coords = []
                # Desenha os pontos (círculos vermelhos)
                for landmark in hand_landmarks:
                    x = int(landmark.x * imagem.shape[1])
                    y = int(landmark.y * imagem.shape[0])
                    cv2.circle(imagem, (x, y), 5, (0, 0, 255), -1)
                    coords.append(landmark.x)
                    coords.append(landmark.y)
                    coords.append(landmark.z)

                for connection in HAND_CONNECTIONS:
                    start_idx, end_idx = connection
                    start = hand_landmarks[start_idx]
                    end = hand_landmarks[end_idx]
                    x1 = int(start.x * imagem.shape[1])
                    y1 = int(start.y * imagem.shape[0])
                    x2 = int(end.x * imagem.shape[1])
                    y2 = int(end.y * imagem.shape[0])
                    cv2.line(imagem, (x1, y1), (x2, y2), (0, 255, 0), 2)

        if modo_gravacao and coords is not None:
            dataset.append(coords + [modo_gravacao])
            contador[modo_gravacao] += 1
            frames_restantes -= 1
            if frames_restantes <= 0:
                modo_gravacao = None

        for gesto, total in contador.items():
            cv2.putText(imagem, f"{gesto}: {total}", (10, 30 + 30*list(contador.keys()).index(gesto)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,255), 2)

        # Mostra
        cv2.imshow("Câmera com Mãos", imagem)

        tecla = cv2.waitKey(1)

        if tecla == ord("r"):
            modo_gravacao = "ROOM"
            frames_restantes = 100

        if tecla == ord("s"):
            modo_gravacao = "SHAMBLES"
            frames_restantes = 100

        if tecla == ord("c"):
            modo_gravacao = "CANCEL"
            frames_restantes = 100

        if tecla == ord("a"):
            modo_gravacao = "APONTAR"
            frames_restantes = 100

        if tecla == ord("n"):
            modo_gravacao = "SCAN"
            frames_restantes = 100

        if tecla == ord("q"):
            break

    if dataset:
        df = pd.DataFrame(dataset)
        df.to_csv("data/dados.csv", index = False)
        print("Salvo")
    else:
        print("Nenhuma amostra coletada")

camera.release()
cv2.destroyAllWindows()

Salvo


In [4]:
df = pd.read_csv("data/dados.csv")
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.783285,0.863024,-3.290968e-07,0.745474,0.860294,-0.024589,0.704715,0.845938,-0.046746,0.670071,...,0.84969,0.812801,-0.094766,0.858995,0.84031,-0.099713,0.862792,0.869314,-0.098802,ROOM
1,0.785929,0.864287,-3.41808e-07,0.747843,0.862889,-0.022397,0.706775,0.847157,-0.043011,0.672425,...,0.848969,0.812607,-0.090883,0.858546,0.839533,-0.095833,0.864216,0.86813,-0.094988,ROOM
2,0.786819,0.867643,-3.201247e-07,0.7482,0.863073,-0.025585,0.705164,0.843385,-0.047004,0.669205,...,0.852061,0.812882,-0.099756,0.861013,0.841706,-0.106386,0.864046,0.871698,-0.106277,ROOM
3,0.786582,0.869731,-3.453223e-07,0.747075,0.865311,-0.023859,0.70538,0.846544,-0.044327,0.669492,...,0.851587,0.813764,-0.091865,0.860328,0.842382,-0.096343,0.862573,0.872093,-0.094458,ROOM
4,0.785856,0.86681,-3.370099e-07,0.746063,0.862057,-0.022864,0.704514,0.844165,-0.043238,0.668159,...,0.851744,0.812186,-0.091265,0.859864,0.840365,-0.095696,0.861655,0.869735,-0.09429,ROOM
