In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torch.optim as optim
import torch.nn as nn
import cv2

In [None]:
model = models.mobilenet_v2(pretrained=True)

In [None]:
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 1)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
train_dir = r'C:\Users\MarcosHenriqueSaito\Documents\Oficina\Marcos_MobileNet\Teste'
val_dir = r'C:\Users\MarcosHenriqueSaito\Documents\Oficina\Marcos_MobileNet\Treino'

In [None]:
train_loader = DataLoader(train_dir, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dir, batch_size=32)

In [None]:

for param in model.parameters():
    param.requires_grad = False

for param in model.classifier[1].parameters():
    param.requires_grad = True

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    train_accuracies = [] 
    train_losses = []
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for data in train_loader:

            print(f"Data type: {type(data)}")
            images, labels = data[:2]
            images, labels = images.to(device), labels.to(device).float()
            optimizer.zero_grad()

            outputs = model(images).squeeze(1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total

        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_accuracy)

        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

        
        validate_model(model, val_loader)

    avg_train_loss = sum(train_losses) / len(train_losses)
    avg_train_accuracy = sum(train_accuracies) / len(train_accuracies)

    print(f"\nAverage Training Loss: {avg_train_loss:.4f}")
    print(f"Average Training Accuracy: {avg_train_accuracy:.2f}%")

    return avg_train_loss, avg_train_accuracy


In [None]:
avg_loss, avg_accuracy = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10)

print(f"Final Results for the Report:")
print(f"Average Training Loss: {avg_loss:.4f}")
print(f"Average Training Accuracy: {avg_accuracy:.2f}%")


In [None]:
torch.save(model.state_dict(), 'mobilenetv.pth')

In [None]:
def load_model():
    model = models.mobilenet_v2(pretrained=False)
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, 1)
    model.load_state_dict(torch.load(r'C:\Users\MarcosHenriqueSaito\Documents\Oficina\projeto\mobilenetv.pth'))
    model.eval()
    return model


In [None]:
def process_image(frame, transform):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = transform(frame_rgb)
    image = image.unsqueeze(0)
    return image

In [None]:

def detect_and_predict(model, transform, device):
    cap = cv2.VideoCapture(0)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            face_image = frame[y:y + h, x:x + w]
            image = process_image(face_image, transform)
            image = image.to(device)

            with torch.no_grad():
                outputs = model(image)
                probability = torch.sigmoid(outputs).item()

            label = "Rosto Detectado" if probability > 0.5 else "Nenhum Rosto Detectado"
            cv2.putText(frame, f"Probabilidade: {probability:.4f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
            cv2.putText(frame, label, (x, y - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

        cv2.imshow("Face Detection and Recognition", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break   
    cap.release()
    cv2.destroyAllWindows()

In [None]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_model().to(device)

detect_and_predict(model, transform, device)