In [3]:
import os
import csv
from datetime import datetime

def create_metadata_csv_with_timestamp(output_folder, metadata_csv_path):
    """
    Creates a CSV metadata file for images organized as:
    output_folder/constituency_X/voteridYYY/image.jpeg
    The CSV will have: constituency, voter_id, image_name, image_path, timestamp
    Timestamp is the last modified time of the image.
    """
    metadata_rows = []

    # Loop over constituency folders
    for constituency in sorted(os.listdir(output_folder)):
        constituency_path = os.path.join(output_folder, constituency)
        if not os.path.isdir(constituency_path):
            continue

        # Loop over voter folders
        for voter in sorted(os.listdir(constituency_path)):
            voter_path = os.path.join(constituency_path, voter)
            if not os.path.isdir(voter_path):
                continue

            # Loop over images inside voter folder
            for image_file in sorted(os.listdir(voter_path)):
                if image_file.lower().endswith(('.jpg', '.jpeg')):
                    image_path = os.path.join(voter_path, image_file)

                    # Get file's last modified time
                    modified_time = os.path.getmtime(image_path)
                    timestamp = datetime.fromtimestamp(modified_time).strftime("%Y-%m-%d %H:%M:%S")

                    metadata_rows.append({
                        "constituency": constituency,
                        "voter_id": voter,
                        "image_name": image_file,
                        "image_path": image_path,
                        "timestamp": timestamp
                    })

    # Write metadata to CSV
    with open(metadata_csv_path, mode='w', newline='') as csvfile:
        fieldnames = ["constituency", "voter_id", "image_name", "image_path", "timestamp"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in metadata_rows:
            writer.writerow(row)

    print(f"✅ Metadata CSV created at {metadata_csv_path} with {len(metadata_rows)} entries.")

# -----------------------
# Example usage
# -----------------------
output_folder = r"D:\AIE\SEM 3\FAI project\originaldataset"  # folder containing constituencies/voterid folders
metadata_csv_path =r"D:\AIE\SEM 3\FAI project\metadata.csv"  # output CSV file

create_metadata_csv_with_timestamp(output_folder, metadata_csv_path)


✅ Metadata CSV created at D:\AIE\SEM 3\FAI project\metadata.csv with 1646 entries.


In [1]:
import cv2
import os
import pandas as pd
from mtcnn import MTCNN

# -------------------------
# Paths
# -------------------------
metadata_csv = r"D:\AIE\SEM 3\FAI project\metadata.csv"   # your dataset CSV
faces_dir = r"D:\AIE\SEM 3\FAI project\MTCNN_faces"      # root folder for cropped faces

# Initialize MTCNN detector
detector = MTCNN()

# Load CSV
df = pd.read_csv(metadata_csv)

# Add new column if not present
if "cropped_face_path" not in df.columns:
    df["cropped_face_path"] = ""

# Loop through each row
for idx, row in df.iterrows():
    img_path = row["image_path"]
    img_name = os.path.basename(img_path)

    # Read image
    img = cv2.imread(img_path)
    if img is None:
        print(f"⚠️ Image not found: {img_path}")
        continue

    # Detect faces
    results = detector.detect_faces(img)

    if results:
        # Take the first detected face (main face)
        x, y, w, h = results[0]['box']

        # Ensure coordinates are valid
        x, y = max(0, x), max(0, y)
        w, h = max(1, w), max(1, h)
        face = img[y:y+h, x:x+w]

        # -------------------------
        # Build directory structure
        # -------------------------
        constituency = str(row["constituency"])
        voter_id = str(row["voter_id"])

        # Final path: faces_dir/constituency/voter_id/
        target_dir = os.path.join(faces_dir, constituency, voter_id)
        os.makedirs(target_dir, exist_ok=True)

        # Save cropped face
        face_filename = f"{os.path.splitext(img_name)[0]}_face.jpg"
        face_path = os.path.join(target_dir, face_filename)
        cv2.imwrite(face_path, face)

        # Update CSV
        df.at[idx, "cropped_face_path"] = face_path

# Save updated CSV (overwrite)
df.to_csv(metadata_csv, index=False)
print(results)
print("✅ MTCNN processing complete.")
print(f"✅ Cropped faces organized under: {faces_dir}")
print("✅ Structure: constituency -> voter_id -> cropped_face.jpg")


[{'box': [59, 115, 257, 308], 'confidence': 0.9999141693115234, 'keypoints': {'nose': [190, 283], 'mouth_right': [240, 343], 'right_eye': [254, 226], 'left_eye': [128, 221], 'mouth_left': [134, 343]}}]
✅ MTCNN processing complete.
✅ Cropped faces organized under: D:\AIE\SEM 3\FAI project\MTCNN_faces
✅ Structure: constituency -> voter_id -> cropped_face.jpg


In [None]:
import cv2
import pandas as pd
import numpy as np
from keras_facenet import FaceNet

# Load dataset with cropped_face_path column
metadata_csv = r"D:\AIE\SEM 3\FAI project\metadata.csv"
df = pd.read_csv(metadata_csv)

# Initialize FaceNet
embedder = FaceNet()

# New column for embeddings
df["facenet_embedding"] = None

for idx, row in df.iterrows():
    face_path = row["cropped_face_path"]

    if not isinstance(face_path, str) or not face_path.strip():
        print(f"⚠️ Skipping {row['image_path']} (no cropped face)")
        continue

    # Load cropped face
    face = cv2.imread(face_path)
    if face is None:
        print(f"⚠️ Skipping {face_path} (not found)")
        continue

    # Preprocess: resize to 160x160 RGB
    face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    face_resized = cv2.resize(face_rgb, (160, 160))
    face_array = np.asarray(face_resized, dtype="float32")

    # Generate embedding
    embedding = embedder.embeddings([face_array])[0]

    # Store in DataFrame
    df.at[idx, "facenet_embedding"] = embedding.tolist()

# Save updated CSV
output_csv = r"D:\AIE\SEM 3\FAI project\facenet_metadata.csv"
df.to_csv(output_csv, index=False)

print(f"✅ FaceNet embeddings added and saved at {output_csv}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [None]:
import cv2
import pandas as pd
import numpy as np
from mtcnn import MTCNN
from keras_facenet import FaceNet
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------
# Paths
# -------------------------
embeddings_csv = r"D:\AIE\SEM 3\FAI project\facenet_metadata.csv"  # CSV with existing embeddings
new_face_path = r"D:\AIE\SEM 3\FAI project\new_face.jpeg"           # new face to recognize

# -------------------------
# Load dataset
# -------------------------
df = pd.read_csv(embeddings_csv)

# Convert embedding strings to numpy arrays
df['facenet_embedding'] = df['facenet_embedding'].apply(lambda x: np.array(eval(x)))

X = np.vstack(df['facenet_embedding'].values)  # embeddings
y = df['voter_id'].values                     # labels

# -------------------------
# Train KNN classifier
# -------------------------
knn = KNeighborsClassifier(n_neighbors=1, metric='cosine')
knn.fit(X, y)
print("✅ KNN trained on existing embeddings")

# -------------------------
# Initialize FaceNet + MTCNN
# -------------------------
embedder = FaceNet()
detector = MTCNN()

# -------------------------
# Detect + crop new face
# -------------------------
img = cv2.imread(new_face_path)
if img is None:
    raise Exception(f"Image not found: {new_face_path}")

results = detector.detect_faces(img)
if len(results) == 0:
    raise Exception("No face detected in the image")

x, y1, w, h = results[0]['box']
x, y1 = max(0, x), max(0, y1)
face = img[y1:y1+h, x:x+w]

# Preprocess for FaceNet
face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face_resized = cv2.resize(face_rgb, (160, 160))
face_array = np.asarray(face_resized, dtype='float32')

# Generate embedding
new_embedding = embedder.embeddings([face_array])[0]

# -------------------------
# Predict voter ID using KNN
# -------------------------
predicted_id = knn.predict([new_embedding])[0]
print("Predicted voter ID:", predicted_id)

# -------------------------
# Verify using cosine similarity
# -------------------------
pred_idx = np.where(y == predicted_id)[0][0]
pred_voter_embedding = X[pred_idx]

similarity = cosine_similarity([new_embedding], [pred_voter_embedding])[0][0]
print("Cosine similarity with predicted voter:", similarity)

threshold = 0.7
if similarity > threshold:
    print("✅ Verification passed: same person")
else:
    print("⚠️ Verification failed: possible mismatch")


✅ KNN trained on existing embeddings
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Predicted voter ID: voterid983
Cosine similarity with predicted voter: 0.9999222762281197
✅ Verification passed: same person
