In [1]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

# import tensorflow as tf
# import tflearn
# from tflearn.layers.conv import conv_2d, max_pool_2d
# from tflearn.layers.core import input_data, dropout, fully_connected
# from tflearn.layers.estimator import regression
# import matplotlib.pyplot as plt

### Generate Dataset

In [2]:
def generate_dataset(name):
    if os.path.exists(f"./data/{name}"):
        print("Dataset already exists")
        return
    else:
        os.mkdir(f"./data/{name}")
    face_classifier = cv2.CascadeClassifier("./haarcascade_frontalface_default.xml")
    def face_cropped(img):
        # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray = img
        faces = face_classifier.detectMultiScale(gray, 1.3, 5)

        if len(faces) == ():
            return None
        
        cropped_face = None
        for (x,y,w,h) in faces:
            cropped_face = img[y:y+h, x:x+w]
        return cropped_face
    
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Error: Camera not found")
        return
    
    img_id = 0

    while True:
        ret, frame = cap.read()
        if face_cropped(frame) is not None:
            img_id+=1
            face = cv2.resize(face_cropped(frame), (200,200))
            # face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            file_name_path = f"./data/{name}/{img_id}.jpg"
            cv2.imwrite(file_name_path, face)
            cv2.putText(face, str(img_id), (50,50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)

            cv2.imshow("Cropped Face", face)
            if cv2.waitKey(1) == 13 or int(img_id) == 100:
                break
    
    cap.release()
    cv2.destroyAllWindows()
    print("Collecting Samples Complete!!!")


In [3]:
generate_dataset('parth')

Dataset already exists


In [4]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms

# Define transformations to be applied to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    # You can add more transformations here if needed (e.g., normalization)
])

# Path to the root directory containing image folders
data_dir = './data'

# Load images from the folders with specified transformations
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(len(dataset)*0.9), int(len(dataset)*0.1)])

# Get the class names
class_names = dataset.classes

# Create a data loader to iterate over the dataset in batches
batch_size = 1

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

data_loader = {
    "train": train_loader,
    "val": test_loader
}

# Iterate over the dataset using the data loader
for images, labels in data_loader['train']:
    print(images.shape, labels.shape)
    break

torch.Size([1, 3, 224, 224]) torch.Size([1])


In [5]:
class_names

['kartik', 'parth', 'shrikant']

In [6]:
len(dataset)

300

In [7]:
from torchvision import models
model = models.mobilenet_v2(weights="DEFAULT")

In [8]:
print(model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [9]:
print(model.classifier)

# Getting number of output classes
num_out_ftrs = len(class_names)

# freeze all layers for fine tuning (not doing this takes it very long to train)
for param in model.parameters():
    param.requires_grad = False

# number of inputs in last layer
num_ftrs = model.classifier[1].in_features

model.classifier[0] = nn.Dropout(p=0.4, inplace=True)
model.classifier[1] = nn.Linear(num_ftrs, num_out_ftrs)

# printing the last layer : classifier
print("After modifying the last layer:")
print(model.classifier)

Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)
After modifying the last layer:
Sequential(
  (0): Dropout(p=0.4, inplace=True)
  (1): Linear(in_features=1280, out_features=3, bias=True)
)


In [10]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

def train_model(model,
                dataloaders,
                criterion = nn.CrossEntropyLoss(),
                prev_checkpoint=None,
                learning_rate=0.001,
                optimizer = None, 
                schedular=None, 
                num_epoch=10,
                save_checkpoint=False):
    
    import copy
    import time
    from tqdm import tqdm
    from IPython.display import clear_output

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    # define default parameters
    if optimizer == None:
        optimizer = torch.optim.Adam(
            params=model.parameters(),
            lr=learning_rate
        )

    # get dataset and dataloader sizes
    dataset_sizes = {
        "train": len(dataloaders["train"].dataset),
        "val": len(dataloaders["val"].dataset)
    }
    dataloader_sizes ={
        "train": len(dataloaders["train"]),
        # "val": len(dataloaders["val"])
    }

    # defining variables for storing loss and accuracy
    train_time_list = []

    if prev_checkpoint != None:
        print(model.load_state_dict(prev_checkpoint["model_state"]))
        optimizer.load_state_dict(prev_checkpoint["optim_state"])
        epochs_completed = prev_checkpoint["epoch"]
        epoch_loss_list = prev_checkpoint["epoch_losses"]
        train_time = prev_checkpoint["time_taken"]
        print('Loaded checkpoint')
    else:
        epochs_completed = 0
        epoch_loss_list = {
            "train": [],
            "val": []
        }
        train_time = 0
    
    print(f"Training Started on {device}")

    for epoch in range(epochs_completed, num_epoch+epochs_completed):

        # Each epoch has a training and a validation phase
        for phase in ["train", 'val']:
            time_start = time.time()

            model.train() if phase == "train" else model.eval()

            epoch_losses = []
            running_loss = 0.0
            running_corrects = 0.0

            # Use tqdm for progress bar during training
            data_loader = tqdm(dataloaders[phase], desc=f'{phase.capitalize()} Epoch {epoch+1}/{num_epoch+epochs_completed}')

            for i, (images, labels) in enumerate(data_loader):
                # print('i')
                images = images.to(device)
                labels = labels.to(device)
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(images)
                    output_one_hot = torch.argmax(outputs, dim=1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training loop
                    if phase == "train":
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(output_one_hot == labels.data)

                # if (i+1) % 10 == 0:
                #     data_loader.set_postfix({'loss': loss.item()})

            if schedular != None and phase == "train":
                    schedular.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = 100 * running_corrects.double() / dataset_sizes[phase]

            data_loader.write(f'loss: {epoch_loss} acc: {epoch_acc}')
                # data_loader.set_postfix({'acc': epoch_acc})

        epoch_time = time.time() - time_start

        if phase == "train":
            train_time_list.append(epoch_time) 

        epoch_loss_list[phase].append(epoch_losses)

        # print("\n")

    train_time += sum(train_time_list)

    checkpoint = {
        "epoch": num_epoch+epochs_completed,
        "criterion": criterion,
        "model_state": model.state_dict(),
        "optim_state": optimizer.state_dict(),
        "epoch_losses": epoch_loss_list,
        "time_taken": train_time
    }
    
    if save_checkpoint == True:
        file_name = f"checkpoint_{time.time()}.pth"
        torch.save(checkpoint, file_name)
    
    return model, checkpoint

In [11]:
model, checkpoint = train_model(model, data_loader, num_epoch=5)

Training Started on cpu


Train Epoch 1/5: 100%|██████████| 270/270 [00:07<00:00, 36.47it/s]


loss: 1.1162488637147125 acc: 31.85185185185185


Val Epoch 1/5: 100%|██████████| 30/30 [00:00<00:00, 44.37it/s]


loss: 0.9497655073801676 acc: 63.333333333333336


Train Epoch 2/5: 100%|██████████| 270/270 [00:06<00:00, 38.66it/s]


loss: 1.0354638317116984 acc: 49.25925925925926


Val Epoch 2/5: 100%|██████████| 30/30 [00:00<00:00, 43.40it/s]


loss: 1.0300434947013855 acc: 36.666666666666664


Train Epoch 3/5: 100%|██████████| 270/270 [00:06<00:00, 39.34it/s]


loss: 0.9488855275842879 acc: 56.666666666666664


Val Epoch 3/5: 100%|██████████| 30/30 [00:00<00:00, 46.26it/s]


loss: 0.9598764459292094 acc: 63.333333333333336


Train Epoch 4/5: 100%|██████████| 270/270 [00:07<00:00, 37.94it/s]


loss: 0.8895231572566209 acc: 64.81481481481481


Val Epoch 4/5: 100%|██████████| 30/30 [00:00<00:00, 46.96it/s]


loss: 0.9816379457712173 acc: 50.0


Train Epoch 5/5: 100%|██████████| 270/270 [00:06<00:00, 40.11it/s]


loss: 0.8319311214817895 acc: 69.62962962962963


Val Epoch 5/5: 100%|██████████| 30/30 [00:00<00:00, 46.08it/s]

loss: 0.9178692668676376 acc: 60.0





In [12]:
for param in model.parameters():
    param.requires_grad = True

In [13]:
model, checkpoint = train_model(model, data_loader, num_epoch=5)

Training Started on cpu


Train Epoch 1/5: 100%|██████████| 270/270 [00:56<00:00,  4.79it/s]


loss: 0.14587784151258854 acc: 95.92592592592592


Val Epoch 1/5: 100%|██████████| 30/30 [00:00<00:00, 44.15it/s]


loss: 0.1265393808600493 acc: 96.66666666666667


Train Epoch 2/5: 100%|██████████| 270/270 [00:55<00:00,  4.85it/s]


loss: 0.009096693092753952 acc: 99.62962962962963


Val Epoch 2/5: 100%|██████████| 30/30 [00:00<00:00, 46.47it/s]


loss: 0.118195112189278 acc: 93.33333333333333


Train Epoch 3/5: 100%|██████████| 270/270 [00:59<00:00,  4.56it/s]


loss: 0.0010278253431250859 acc: 100.0


Val Epoch 3/5: 100%|██████████| 30/30 [00:00<00:00, 43.54it/s]


loss: 0.10594127655507085 acc: 93.33333333333333


Train Epoch 4/5: 100%|██████████| 270/270 [00:54<00:00,  4.91it/s]


loss: 0.0002930436619054591 acc: 100.0


Val Epoch 4/5: 100%|██████████| 30/30 [00:00<00:00, 47.19it/s]


loss: 0.09285060615050801 acc: 96.66666666666667


Train Epoch 5/5: 100%|██████████| 270/270 [00:55<00:00,  4.86it/s]


loss: 0.00014658675866114748 acc: 100.0


Val Epoch 5/5: 100%|██████████| 30/30 [00:00<00:00, 46.18it/s]

loss: 0.08192721930099651 acc: 96.66666666666667





In [14]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms

facedetect = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)
font = cv2.FONT_HERSHEY_COMPLEX

transform = transforms.Compose([
    transforms.ToPILImage(),        # Convert numpy array to PIL Image
    transforms.Resize((200, 200)),  # Resize image to 224x224
    transforms.ToTensor(),           # Convert image to PyTorch tensor
])

try:
    while True:
        success, imgOrignal = cap.read()
        faces = facedetect.detectMultiScale(imgOrignal, 1.3, 5)
        for x, y, w, h in faces:
            crop_img = imgOrignal[y:y+h, x:x+h]
            # img = cv2.resize(crop_img, (200, 200))
            img = transform(crop_img)  # Use the defined transform
            img = img.unsqueeze(0)  # Add batch dimension
            img = img.to(device)
            # print(img.device)
            prediction = model(img)
            classIndex = torch.argmax(prediction, dim=1)
            # probabilityValue = np.amax(prediction)
            cv2.rectangle(imgOrignal, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.rectangle(imgOrignal, (x, y-40), (x+w, y), (0, 255, 0), -2)
            cv2.putText(imgOrignal, str(class_names[classIndex]), (x, y-10), font, 0.75, (255, 255, 255), 1, cv2.LINE_AA)

            # cv2.putText(imgOrignal, str(round(probabilityValue*100, 2))+"%", (180, 75), font, 0.75, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.imshow("Result", imgOrignal)
        k = cv2.waitKey(1)
        if k == ord('q'):
            break
finally:
    cap.release()
    cv2.destroyAllWindows()


False




KeyboardInterrupt: 