In [5]:
!pip install google-cloud-vision




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import io

import sys
from pathlib import Path
from google.cloud import vision
from PIL import Image
import imagehash

# ========= OCR =========
def extract_text(client, image_path: str) -> str:
    with io.open(image_path, "rb") as f:
        content = f.read()
    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    return texts[0].description if texts else ""

# ========= FACE EXTRACTION =========
def extract_face_and_save(client, image_path: str, output_file: str):
    with io.open(image_path, "rb") as f:
        content = f.read()
    image = vision.Image(content=content)

    response = client.face_detection(image=image)
    faces = response.face_annotations
    if not faces:
        return None

    with Image.open(image_path) as pil_img:
        face = faces[0]
        x_min = min(v.x for v in face.bounding_poly.vertices)
        y_min = min(v.y for v in face.bounding_poly.vertices)
        x_max = max(v.x for v in face.bounding_poly.vertices)
        y_max = max(v.y for v in face.bounding_poly.vertices)

        face_crop = pil_img.crop((x_min, y_min, x_max, y_max))
        face_crop.save(output_file)
        return output_file

# ========= FACE COMPARISON =========
def compare_faces(img1_path: str, img2_path: str, threshold: float = 0.75):
    try:
        hash1 = imagehash.phash(Image.open(img1_path))
        hash2 = imagehash.phash(Image.open(img2_path))
        diff = hash1 - hash2
        similarity = 1 - (diff / 64.0)
        match = similarity >= threshold
        return match, float(similarity)
    except Exception as e:
        print(f"Erro na comparação: {e}")
        return False, 0.0

# Ajuste o caminho para a pasta onde está o utils.py
BASE_DIR = Path("G:/Meu Drive/AI_data_lab/Cursos_ml_AI/Fiap/Cognitive Environments/trabalho_final")
sys.path.append(str(BASE_DIR))

In [7]:
import os
import json
import pandas as pd
from pathlib import Path
from google.cloud import vision
from google.oauth2 import service_account

# importa funções utilitárias
from utils import extract_text, compare_faces, extract_face_and_save

In [8]:
# configuração de credenciais
SERVICE_ACCOUNT_FILE = (
    r"G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments"
    r"\trabalho_final\cred\dts-10-ds-32748754226a.json"
)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = SERVICE_ACCOUNT_FILE
creds = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE
)
client = vision.ImageAnnotatorClient(credentials=creds)
print("Credenciais carregadas com sucesso.")

Credenciais carregadas com sucesso.


In [9]:
# configuração global
THRESHOLD = 0.7  # limite mínimo de similaridade aceito

# definição de caminhos
doc_path = Path(
    r"G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments"
    r"\trabalho_final\data\006.jpeg"
)
comp_path = Path(
    r"G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments"
    r"\trabalho_final\data\003.jpg"
)
selfie_path = Path(
    r"G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments"
    r"\trabalho_final\data\007.png"
)
out_dir = Path("outputs")
out_dir.mkdir(parents=True, exist_ok=True)

In [10]:
# OCR
print("Extraindo OCR da CNH...")
doc_text = extract_text(client, str(doc_path))

print("Extraindo OCR do Comprovante...")
comp_text = extract_text(client, str(comp_path))

# extração de rosto
print("Extraindo rosto da CNH...")
face_from_doc = extract_face_and_save(
    client, str(doc_path), str(out_dir / "face_doc.jpg")
)

# comparação facial
print("Comparando selfie com CNH...")
match, similarity = compare_faces(
    str(selfie_path), str(face_from_doc), threshold=THRESHOLD
)

if match:
    print(
        f"Face compatível! Similaridade: {similarity:.3f} "
        f"(mínimo aceito = {THRESHOLD})"
    )
else:
    print(
        f"Face não compatível! Similaridade: {similarity:.3f} "
        f"(mínimo aceito = {THRESHOLD})"
    )

# consolidação de resultados
resultado = {
    "face_match": bool(match),
    "similaridade": float(round(similarity, 3)),
    "threshold_utilizado": THRESHOLD,
    "documento_extraido": str(doc_text),
    "comprovante_extraido": str(comp_text),
    "nome_valido": bool("LUIZ ANTONIO DE OLIVEIRA" in comp_text),
}

Extraindo OCR da CNH...
Extraindo OCR do Comprovante...
Extraindo rosto da CNH...
Rosto salvo em: outputs\face_doc.jpg
Comparando selfie com CNH...
Face não compatível! Similaridade: 0.688 (mínimo aceito = 0.7)


In [11]:
# salvar JSON
json_file = out_dir / "results.json"
with open(json_file, "w", encoding="utf-8") as f:
    json.dump(resultado, f, ensure_ascii=False, indent=4)
print(f"JSON salvo em {json_file.resolve()}")

# salvar CSV
df = pd.DataFrame([resultado])
csv_file = out_dir / "results.csv"
df.to_csv(csv_file, index=False, encoding="utf-8-sig")
print(f"CSV salvo em {csv_file.resolve()}")

print("Pipeline concluído com sucesso!")

JSON salvo em G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments\trabalho_final\notebook\outputs\results.json
CSV salvo em G:\Meu Drive\AI_data_lab\Cursos_ml_AI\Fiap\Cognitive Environments\trabalho_final\notebook\outputs\results.csv
Pipeline concluído com sucesso!
