In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

2025-07-08 17:13:45.621362: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751994826.030365    2156 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751994826.155598    2156 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751994827.224650    2156 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1751994827.224691    2156 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1751994827.224695    2156 computation_placer.cc:177] computation placer alr

Num GPUs Available:  1


In [2]:
# Imports
import torch
from torchvision import transforms
from PIL import Image
import os
import cv2
import numpy as np
import pandas as pd
from datetime import datetime
from transformers import AutoFeatureExtractor, AutoModel
import streamlit as st

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Configuration
MODEL_NAME = "google/vit-base-patch16-224"
REGISTERED_DIR = "registered_faces"
ATTENDANCE_CSV = "attendance_log.csv"
SIMILARITY_THRESHOLD = 0.7

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(device)
model.eval()

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTModel(
  (embeddings): ViTEmbeddings(
    (patch_embeddings): ViTPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ViTEncoder(
    (layer): ModuleList(
      (0-11): 12 x ViTLayer(
        (attention): ViTAttention(
          (attention): ViTSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
          )
          (output): ViTSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUActivation()
        )
        (output): ViTOutput(
          (d

In [4]:
# Utils
def preprocess_image(image):
    return extractor(images=image, return_tensors="pt")["pixel_values"].to(device)

def get_embedding(image):
    inputs = preprocess_image(image)
    with torch.no_grad():
        outputs = model(inputs)
    emb = outputs.last_hidden_state[:, 0, :].cpu().numpy()
    return emb[0]

def load_registered_embeddings():
    embeddings = []
    names = []
    for person in os.listdir(REGISTERED_DIR):
        person_dir = os.path.join(REGISTERED_DIR, person)
        if not os.path.isdir(person_dir):
            continue
        for img_file in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_file)
            img = Image.open(img_path).convert("RGB")
            emb = get_embedding(img)
            embeddings.append(emb)
            names.append(person)
    return np.array(embeddings), names

def mark_attendance(name):
    now = datetime.now()
    date_str = now.strftime('%Y-%m-%d')
    time_str = now.strftime('%H:%M:%S')
    if os.path.exists(ATTENDANCE_CSV):
        df = pd.read_csv(ATTENDANCE_CSV)
        if ((df['Name'] == name) & (df['Date'] == date_str)).any():
            return
    else:
        df = pd.DataFrame(columns=['Name', 'Date', 'Time'])
    df = df.append({'Name': name, 'Date': date_str, 'Time': time_str}, ignore_index=True)
    df.to_csv(ATTENDANCE_CSV, index=False)

def cosine_similarity(a, b):
    a = a / np.linalg.norm(a)
    b = b / np.linalg.norm(b)
    return np.dot(a, b)

In [5]:
# StreamLit
st.title("Smart Attendance System with HuggingFace ViT \U0001F4BB")
option = st.selectbox("Choose an Option", ["Run Attendance", "View Attendance Log"])

if option == "Run Attendance":
    st.info("Turn on camera and align your face...")
    run = st.button("Start Attendance")
    if run:
        reg_embeddings, reg_names = load_registered_embeddings()
        cap = cv2.VideoCapture(0)
        stframe = st.empty()

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_img = Image.fromarray(rgb)
            emb = get_embedding(face_img)

            # Compare with registered
            similarities = [cosine_similarity(emb, reg_emb) for reg_emb in reg_embeddings]
            max_idx = np.argmax(similarities)
            max_sim = similarities[max_idx]

            if max_sim > SIMILARITY_THRESHOLD:
                name = reg_names[max_idx]
                mark_attendance(name)
                color = (0, 255, 0)
            else:
                name = "Unknown"
                color = (0, 0, 255)

            cv2.putText(frame, f"{name} ({max_sim:.2f})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
            stframe.image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

elif option == "View Attendance Log":
    if os.path.exists(ATTENDANCE_CSV):
        df = pd.read_csv(ATTENDANCE_CSV)
        st.dataframe(df)
    else:
        st.warning("No attendance log found yet.")

2025-07-08 17:19:36.470 
  command:

    streamlit run /mnt/d/ML/Projects/smart-face-recognition-attendance-system/facenet-env/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-07-08 17:19:36.475 Session state does not function when running a script without `streamlit run`
