<a href="https://colab.research.google.com/github/Kalashshetty/face_recognition_system/blob/main/Face_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, applications
import zipfile
import os
import time
import pandas as pd
from google.colab import files
from IPython.display import display, Javascript
from google.colab.output import eval_js
import base64

# Force TensorFlow to use CPU only
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

def upload_and_extract_dataset():
    print("Please upload your zipped dataset containing face images")
    uploaded = files.upload()
    if not uploaded:
        raise ValueError("No file was uploaded")
    zip_file = list(uploaded.keys())[0]

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        print("Extracting ZIP contents:")
        for file_info in zip_ref.infolist():
            print(f"- {file_info.filename}")
        zip_ref.extractall('dataset')

    if not os.path.exists('dataset'):
        raise ValueError("Dataset extraction failed")
    return 'dataset'

def prepare_training_data(dataset_path):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        zoom_range=0.2,
        shear_range=0.2
    )

    base_path = os.path.join(dataset_path, 'ALL_Dataset')
    if not os.path.exists(base_path):
        base_path = dataset_path  # Fallback to root if ALL_Dataset not found

    print(f"\nScanning dataset directory: {base_path}")
    for person_name in os.listdir(base_path):
        person_path = os.path.join(base_path, person_name)
        if os.path.isdir(person_path):
            image_count = 0
            print(f"\nProcessing {person_name}:")

            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                if os.path.isfile(img_path) and img_name.lower().endswith(('.jpg', '.png')):
                    img = cv2.imread(img_path)
                    if img is not None:
                        img = cv2.resize(img, (224, 224))
                        img = img / 255.0
                        images.append(img)
                        labels.append(current_label)
                        image_count += 1
                        print(f"- Loaded {img_name}")
                    else:
                        print(f"- Failed to load {img_name}")

            print(f"Loaded {image_count} images for {person_name}")
            if image_count > 0:
                label_map[current_label] = person_name
                current_label += 1

    if not images:
        raise ValueError("No valid images found in the dataset. See above logs for details.")

    images = np.array(images)
    labels = np.array(labels)

    print(f"\nTotal images loaded: {len(images)}")
    print(f"Total people: {len(label_map)}")

    augmented_images = []
    augmented_labels = []
    for img, label in zip(images, labels):
        img = np.expand_dims(img, 0)
        aug_iter = datagen.flow(img, batch_size=1)
        for _ in range(3):
            aug_img = next(aug_iter)[0]
            augmented_images.append(aug_img)
            augmented_labels.append(label)

    images = np.concatenate([images, augmented_images])
    labels = np.concatenate([labels, augmented_labels])

    return images, labels, label_map

def create_and_train_model(images, labels):
    if len(images) < 5:
        raise ValueError(f"Not enough samples ({len(images)}) to train the model")

    base_model = applications.MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(len(set(labels)), activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='loss' if len(images) < 10 else 'val_loss',
        patience=3,
        restore_best_weights=True
    )

    validation_split = 0.2 if len(images) >= 10 else 0.0

    history = model.fit(
        images, labels,
        epochs=20,
        batch_size=32,
        validation_split=validation_split,
        callbacks=[early_stopping]
    )

    return model

def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div');
            const video = document.createElement('video');
            video.style.display = 'block';
            const stream = await navigator.mediaDevices.getUserMedia({video: true});
            document.body.appendChild(div);
            div.appendChild(video);
            video.srcObject = stream;
            await video.play();
            google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
            await new Promise((resolve) => setTimeout(resolve, 1000));
            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            canvas.getContext('2d').drawImage(video, 0, 0);
            stream.getVideoTracks()[0].stop();
            div.remove();
            return canvas.toDataURL('image/jpeg', quality);
        }
    ''')
    display(js)
    data = eval_js('takePhoto({})'.format(quality))
    binary = base64.b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

def main():
    try:
        dataset_path = upload_and_extract_dataset()
        images, labels, label_map = prepare_training_data(dataset_path)

        print("Training the model on CPU...")
        with tf.device('/CPU:0'):
            model = create_and_train_model(images, labels)

        results = []
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

        print("Starting camera capture...")
        for i in range(5):
            try:
                filename = take_photo(f'capture_{i}.jpg')
                img = cv2.imread(filename)

                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                faces = face_cascade.detectMultiScale(gray, 1.1, 4)

                timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
                detected_person = "Unknown"

                for (x, y, w, h) in faces:
                    face = img[y:y+h, x:x+w]
                    face = cv2.resize(face, (224, 224))
                    face = np.expand_dims(face/255.0, axis=0)

                    with tf.device('/CPU:0'):
                        prediction = model.predict(face)
                    person_id = np.argmax(prediction)
                    confidence = np.max(prediction)

                    if confidence > 0.95:
                        detected_person = label_map[person_id]

                    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
                    cv2.putText(img, f"{detected_person} ({confidence:.2f})",
                                (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9,
                                (0, 255, 0), 2)

                cv2.imwrite(f'processed_{i}.jpg', img)
                results.append({'timestamp': timestamp, 'person': detected_person})

                print(f"Captured image {i+1}/5 - Detected: {detected_person}")
                if i < 4:
                    time.sleep(30)

            except Exception as e:
                print(f"Error in capture {i}: {str(e)}")

        df = pd.DataFrame(results)
        df_missing = pd.DataFrame(columns=['timestamp', 'missing_person'])

        all_people = set(label_map.values())
        for timestamp, person in zip(df['timestamp'], df['person']):
            if person != "Unknown":
                missing = all_people - {person}
                for m in missing:
                    df_missing = pd.concat([df_missing, pd.DataFrame([{
                        'timestamp': timestamp,
                        'missing_person': m
                    }])], ignore_index=True)

        with pd.ExcelWriter('detection_results.xlsx') as writer:
            df.to_excel(writer, sheet_name='Detections', index=False)
            df_missing.to_excel(writer, sheet_name='Missing', index=False)

        print("Results saved to 'detection_results.xlsx'")
        files.download('detection_results.xlsx')

    except Exception as e:
        print(f"Error: {str(e)}")
        print("\nDebugging tips:")
        print("- Check if images are readable")
        print("- Verify sufficient memory available")
        print("- Ensure images contain detectable faces")

if __name__ == "__main__":
    main()


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, applications
import zipfile
import os
import time
import pandas as pd
from google.colab import files
from IPython.display import display, Javascript
from google.colab.output import eval_js
import base64

# Force TensorFlow to use CPU only
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

def upload_and_extract_dataset():
    print("Please upload your zipped dataset containing face images")
    uploaded = files.upload()
    if not uploaded:
        raise ValueError("No file was uploaded")
    zip_file = list(uploaded.keys())[0]

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        print("Extracting ZIP contents:")
        for file_info in zip_ref.infolist():
            print(f"- {file_info.filename}")
        zip_ref.extractall('dataset')

    if not os.path.exists('dataset'):
        raise ValueError("Dataset extraction failed")
    return 'dataset'

def prepare_training_data(dataset_path):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        zoom_range=0.2,
        shear_range=0.2
    )

    base_path = os.path.join(dataset_path, 'ALL_Dataset')
    if not os.path.exists(base_path):
        base_path = dataset_path  # Fallback to root if ALL_Dataset not found

    print(f"\nScanning dataset directory: {base_path}")
    for person_name in os.listdir(base_path):
        person_path = os.path.join(base_path, person_name)
        if os.path.isdir(person_path):
            image_count = 0
            print(f"\nProcessing {person_name}:")

            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                if os.path.isfile(img_path) and img_name.lower().endswith(('.jpg', '.png')):
                    img = cv2.imread(img_path)
                    if img is not None:
                        img = cv2.resize(img, (224, 224))
                        img = img / 255.0
                        images.append(img)
                        labels.append(current_label)
                        image_count += 1
                        print(f"- Loaded {img_name}")
                    else:
                        print(f"- Failed to load {img_name}")

            print(f"Loaded {image_count} images for {person_name}")
            if image_count > 0:
                label_map[current_label] = person_name
                current_label += 1

    if not images:
        raise ValueError("No valid images found in the dataset. See above logs for details.")

    images = np.array(images)
    labels = np.array(labels)

    print(f"\nTotal images loaded: {len(images)}")
    print(f"Total people: {len(label_map)}")

    augmented_images = []
    augmented_labels = []
    for img, label in zip(images, labels):
        img = np.expand_dims(img, 0)
        aug_iter = datagen.flow(img, batch_size=1)
        for _ in range(3):
            aug_img = next(aug_iter)[0]
            augmented_images.append(aug_img)
            augmented_labels.append(label)

    images = np.concatenate([images, augmented_images])
    labels = np.concatenate([labels, augmented_labels])

    return images, labels, label_map

def create_and_train_model(images, labels):
    if len(images) < 5:
        raise ValueError(f"Not enough samples ({len(images)}) to train the model")

    base_model = applications.MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(len(set(labels)), activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='loss' if len(images) < 10 else 'val_loss',
        patience=3,
        restore_best_weights=True
    )

    validation_split = 0.2 if len(images) >= 10 else 0.0

    history = model.fit(
        images, labels,
        epochs=20,
        batch_size=32,
        validation_split=validation_split,
        callbacks=[early_stopping]
    )

    return model

def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div');
            const video = document.createElement('video');
            video.style.display = 'block';
            const stream = await navigator.mediaDevices.getUserMedia({video: true});
            document.body.appendChild(div);
            div.appendChild(video);
            video.srcObject = stream;
            await video.play();
            google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
            await new Promise((resolve) => setTimeout(resolve, 1000));
            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            canvas.getContext('2d').drawImage(video, 0, 0);
            stream.getVideoTracks()[0].stop();
            div.remove();
            return canvas.toDataURL('image/jpeg', quality);
        }
    ''')
    display(js)
    data = eval_js('takePhoto({})'.format(quality))
    binary = base64.b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

def main():
    try:
        dataset_path = upload_and_extract_dataset()
        images, labels, label_map = prepare_training_data(dataset_path)

        print("Training the model on CPU...")
        with tf.device('/CPU:0'):
            model = create_and_train_model(images, labels)

        results = []
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

        print("Starting camera capture...")
        for i in range(5):
            try:
                filename = take_photo(f'capture_{i}.jpg')
                img = cv2.imread(filename)

                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                faces = face_cascade.detectMultiScale(gray, 1.1, 4)

                timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
                detected_person = "Unknown"

                for (x, y, w, h) in faces:
                    face = img[y:y+h, x:x+w]
                    face = cv2.resize(face, (224, 224))
                    face = np.expand_dims(face/255.0, axis=0)

                    with tf.device('/CPU:0'):
                        prediction = model.predict(face)
                    person_id = np.argmax(prediction)
                    confidence = np.max(prediction)

                    if confidence > 0.95:
                        detected_person = label_map[person_id]

                    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
                    cv2.putText(img, f"{detected_person} ({confidence:.2f})",
                                (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9,
                                (0, 255, 0), 2)

                cv2.imwrite(f'processed_{i}.jpg', img)
                results.append({'timestamp': timestamp, 'person': detected_person})

                print(f"Captured image {i+1}/5 - Detected: {detected_person}")
                if i < 4:
                    time.sleep(30)

            except Exception as e:
                print(f"Error in capture {i}: {str(e)}")

        df = pd.DataFrame(results)
        df_missing = pd.DataFrame(columns=['timestamp', 'missing_person'])

        all_people = set(label_map.values())
        for timestamp, person in zip(df['timestamp'], df['person']):
            if person != "Unknown":
                missing = all_people - {person}
                for m in missing:
                    df_missing = pd.concat([df_missing, pd.DataFrame([{
                        'timestamp': timestamp,
                        'missing_person': m
                    }])], ignore_index=True)

        with pd.ExcelWriter('detection_results.xlsx') as writer:
            df.to_excel(writer, sheet_name='Detections', index=False)
            df_missing.to_excel(writer, sheet_name='Missing', index=False)

        print("Results saved to 'detection_results.xlsx'")
        files.download('detection_results.xlsx')

    except Exception as e:
        print(f"Error: {str(e)}")
        print("\nDebugging tips:")
        print("- Check if images are readable")
        print("- Verify sufficient memory available")
        print("- Ensure images contain detectable faces")

if __name__ == "__main__":
    main()


In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!ls

In [None]:
!rm -f facenet_keras.h5

In [None]:
import gdown
url = "https://drive.google.com/uc?id=1PZ_6Zsy1Vb0s0JmjEmVd8FS99zoMCiN1"
output = 'facenet_keras.h5'
gdown.download(url, output, quiet=False)

In [None]:
!ls -lh facenet_keras.h5

In [None]:
model = tf.keras.models.load_model('facenet_keras.h5')

In [None]:
# Install required libraries
!pip install opencv-python opencv-python-headless pandas numpy tensorflow gdown

import os
import zipfile
import cv2
import numpy as np
import tensorflow as tf
import pandas as pd
import time
from google.colab import files
from IPython.display import display, Javascript
from google.colab.output import eval_js
import base64
from sklearn.preprocessing import Normalizer
from scipy.spatial.distance import cosine
import gdown

# Force TensorFlow to use CPU only
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Load pre-trained FaceNet model
def load_facenet_model():
    if not os.path.exists('facenet_keras.h5'):
        print("Downloading FaceNet model...")
        url = "https://drive.google.com/uc?id=1PZ_6Zsy1Vb0s0JmjEmVd8FS99zoMCiN1"
        output = 'facenet_keras.h5'
        gdown.download(url, output, quiet=False)

    # Verify file size
    file_size = os.path.getsize('facenet_keras.h5')
    if file_size < 90000000:  # File should be ~92.4 MB
        raise ValueError("Downloaded model file is corrupted or incomplete. Please try again.")

    model = tf.keras.models.load_model('facenet_keras.h5')
    return model

# Function to extract face embeddings using FaceNet
def get_embedding(model, face_pixels):
    face_pixels = face_pixels.astype('float32')
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    samples = np.expand_dims(face_pixels, axis=0)
    embedding = model.predict(samples)
    return embedding[0]

# Function to unzip the dataset
def upload_and_extract_dataset():
    print("Please upload your zipped dataset containing face images")
    uploaded = files.upload()
    if not uploaded:
        raise ValueError("No file was uploaded")
    zip_file = list(uploaded.keys())[0]

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        print("Extracting ZIP contents:")
        for file_info in zip_ref.infolist():
            print(f"- {file_info.filename}")
        zip_ref.extractall('dataset')

    if not os.path.exists('dataset'):
        raise ValueError("Dataset extraction failed")
    return 'dataset'

# Function to prepare training data
def prepare_training_data(dataset_path):
    images = []
    labels = []
    label_map = {}
    current_label = 0

    print(f"\nScanning dataset directory: {dataset_path}")
    for person_name in os.listdir(dataset_path):
        person_path = os.path.join(dataset_path, person_name)
        if os.path.isdir(person_path):
            image_count = 0
            print(f"\nProcessing {person_name}:")

            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                if os.path.isfile(img_path) and img_name.lower().endswith(('.jpg', '.png')):
                    img = cv2.imread(img_path)
                    if img is not None:
                        img = cv2.resize(img, (160, 160))  # FaceNet input size
                        images.append(img)
                        labels.append(current_label)
                        image_count += 1
                        print(f"- Loaded {img_name}")
                    else:
                        print(f"- Failed to load {img_name}")

            print(f"Loaded {image_count} images for {person_name}")
            if image_count > 0:
                label_map[current_label] = person_name
                current_label += 1

    if not images:
        raise ValueError("No valid images found in the dataset. See above logs for details.")

    images = np.array(images)
    labels = np.array(labels)

    print(f"\nTotal images loaded: {len(images)}")
    print(f"Total people: {len(label_map)}")

    return images, labels, label_map

# Function to take a photo using the webcam
def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div');
            const video = document.createElement('video');
            video.style.display = 'block';
            const stream = await navigator.mediaDevices.getUserMedia({video: true});
            document.body.appendChild(div);
            div.appendChild(video);
            video.srcObject = stream;
            await video.play();
            google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
            await new Promise((resolve) => setTimeout(resolve, 1000));
            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            canvas.getContext('2d').drawImage(video, 0, 0);
            stream.getVideoTracks()[0].stop();
            div.remove();
            return canvas.toDataURL('image/jpeg', quality);
        }
    ''')
    display(js)
    data = eval_js('takePhoto({})'.format(quality))
    binary = base64.b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Main function
def main():
    try:
        # Load pre-trained FaceNet model
        print("Loading FaceNet model...")
        facenet_model = load_facenet_model()

        # Upload and extract dataset
        dataset_path = upload_and_extract_dataset()

        # Prepare training data
        images, labels, label_map = prepare_training_data(dataset_path)

        # Extract embeddings for all images in the dataset
        print("Extracting embeddings from dataset...")
        embeddings = []
        for img in images:
            embedding = get_embedding(facenet_model, img)
            embeddings.append(embedding)
        embeddings = np.array(embeddings)

        # Normalize embeddings
        normalizer = Normalizer(norm='l2')
        embeddings = normalizer.transform(embeddings)

        # Start camera capture and recognition
        results = []
        print("Starting camera capture...")
        for i in range(5):
            try:
                filename = take_photo(f'capture_{i}.jpg')
                img = cv2.imread(filename)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_resized = cv2.resize(img_rgb, (160, 160))

                # Extract embedding for the captured face
                captured_embedding = get_embedding(facenet_model, img_resized)
                captured_embedding = normalizer.transform([captured_embedding])

                # Compare with dataset embeddings
                min_dist = float('inf')
                identity = "Unknown"
                for idx, emb in enumerate(embeddings):
                    dist = cosine(captured_embedding, emb)
                    if dist < min_dist and dist < 0.5:  # Threshold for recognition
                        min_dist = dist
                        identity = label_map[labels[idx]]

                timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
                results.append({'timestamp': timestamp, 'person': identity})

                print(f"Captured image {i+1}/5 - Detected: {identity}")
                if i < 4:
                    time.sleep(30)

            except Exception as e:
                print(f"Error in capture {i}: {str(e)}")

        # Save results to Excel
        df = pd.DataFrame(results)
        with pd.ExcelWriter('detection_results.xlsx') as writer:
            df.to_excel(writer, sheet_name='Detections', index=False)

        print("Results saved to 'detection_results.xlsx'")
        files.download('detection_results.xlsx')

    except Exception as e:
        print(f"Error: {str(e)}")
        print("\nDebugging tips:")
        print("- Check if images are readable")
        print("- Verify sufficient memory available")
        print("- Ensure images contain detectable faces")

if __name__ == "__main__":
    main()

GROK


In [None]:
# Install dependencies and build Dlib without CUDA
!apt-get update
!apt-get install -y build-essential cmake libopenblas-dev liblapack-dev
!pip install opencv-python numpy pandas openpyxl
!pip uninstall -y dlib  # Remove any existing Dlib
!git clone https://github.com/davisking/dlib.git
%cd dlib
!mkdir build
%cd build
!cmake .. -DUSE_AVX_INSTRUCTIONS=1 -DDLIB_USE_CUDA=0  # Explicitly disable CUDA
!cmake --build . --config Release
!cd .. && python setup.py install --no DLIB_USE_CUDA

import cv2
import numpy as np
import dlib
import os
import time
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import Normalizer
from google.colab import files
import zipfile
from IPython.display import display, Image

# Function to upload and extract zip file
def upload_and_extract_dataset():
    print("Please upload your zipped dataset containing face images (e.g., faces.zip)")
    uploaded = files.upload()
    zip_file = list(uploaded.keys())[0]

    extract_path = '/content/dataset'
    os.makedirs(extract_path, exist_ok=True)
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    return os.path.join(extract_path, 'ALL_Dataset')

# Function to get face embedding using Dlib
def get_embedding(face_img, detector, shape_predictor, face_recognizer):
    rgb_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    dets = detector(rgb_img, 1)
    if len(dets) == 0:
        return None

    d = dets[0]
    shape = shape_predictor(rgb_img, d)
    embedding = face_recognizer.compute_face_descriptor(rgb_img, shape)
    return np.array(embedding)

# Function to prepare training data and train KNN
def prepare_and_train(dataset_path):
    # Initialize Dlib models (CPU-only)
    detector = dlib.get_frontal_face_detector()
    shape_predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    face_recognizer = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')

    embeddings = []
    labels = []
    label_map = {}
    current_label = 0

    print("Scanning dataset and generating embeddings...")
    for person_name in os.listdir(dataset_path):
        person_path = os.path.join(dataset_path, person_name)
        if os.path.isdir(person_path):
            label_map[current_label] = person_name
            image_count = 0
            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                if os.path.isfile(img_path):
                    img = cv2.imread(img_path)
                    if img is not None:
                        embedding = get_embedding(img, detector, shape_predictor, face_recognizer)
                        if embedding is not None:
                            embeddings.append(embedding)
                            labels.append(current_label)
                            image_count += 1
            print(f"Processed {image_count} valid face images for {person_name}")
            current_label += 1

    if not embeddings:
        raise ValueError("No valid face embeddings generated")

    embeddings = np.array(embeddings)
    labels = np.array(labels)

    # Normalize embeddings
    normalizer = Normalizer(norm='l2')
    embeddings_normalized = normalizer.transform(embeddings)

    # Train KNN classifier
    knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
    knn.fit(embeddings_normalized, labels)

    return knn, normalizer, label_map, detector, shape_predictor, face_recognizer

# Function to process uploaded test images
def run_recognition(knn, normalizer, label_map, detector, shape_predictor, face_recognizer):
    print("Please upload test images to recognize faces (e.g., test1.jpg, test2.jpg)")
    uploaded = files.upload()
    results = []

    for filename in uploaded.keys():
        start_time = time.time()
        img = cv2.imdecode(np.frombuffer(uploaded[filename], np.uint8), cv2.IMREAD_COLOR)

        dets = detector(img, 1)
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        detected_people = []

        print(f"Detected {len(dets)} faces in {filename}")
        for d in dets:
            left, top, right, bottom = d.left(), d.top(), d.right(), d.bottom()
            left, top = max(0, left), max(0, top)
            right, bottom = min(img.shape[1], right), min(img.shape[0], bottom)
            face_img = img[top:bottom, left:right]
            if face_img.size > 0:
                embedding = get_embedding(img, detector, shape_predictor, face_recognizer)
                if embedding is not None:
                    embedding_normalized = normalizer.transform([embedding])
                    prediction = knn.predict(embedding_normalized)
                    confidence = knn.predict_proba(embedding_normalized).max()

                    person_name = label_map[prediction[0]] if confidence > 0.7 else "Unknown"
                    detected_people.append(person_name)

                    cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 2)
                    cv2.putText(img, f"{person_name} ({confidence:.2f})",
                               (left, top-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        output_filename = f'processed_{filename}'
        cv2.imwrite(output_filename, img)
        display(Image(filename=output_filename))

        result_person = detected_people[0] if detected_people else "Unknown"
        results.append({'timestamp': timestamp, 'person': result_person})
        print(f"Processed {filename} - Detected: {', '.join(detected_people)} (Time: {time.time() - start_time:.2f}s)")

    # Generate Excel output
    df = pd.DataFrame(results)
    df_missing = pd.DataFrame(columns=['timestamp', 'missing_person'])

    all_people = set(label_map.values())
    for timestamp, person in zip(df['timestamp'], df['person']):
        if person != "Unknown":
            missing = all_people - {person}
            for m in missing:
                df_missing = pd.concat(
                    [df_missing, pd.DataFrame([{'timestamp': timestamp, 'missing_person': m}])],
                    ignore_index=True
                )

    output_file = 'detection_results.xlsx'
    with pd.ExcelWriter(output_file) as writer:
        df.to_excel(writer, sheet_name='Detections', index=False)
        df_missing.to_excel(writer, sheet_name='Missing', index=False)

    print(f"Results saved to '{output_file}'")
    files.download(output_file)

# Main execution
def main():
    dataset_path = upload_and_extract_dataset()
    print("Preparing face embeddings and training classifier...")
    # Download Dlib models if not present
    if not os.path.exists('shape_predictor_68_face_landmarks.dat'):
        !wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
        !bunzip2 shape_predictor_68_face_landmarks.dat.bz2
    if not os.path.exists('dlib_face_recognition_resnet_model_v1.dat'):
        !wget http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2
        !bunzip2 dlib_face_recognition_resnet_model_v1.dat.bz2

    knn, normalizer, label_map, detector, shape_predictor, face_recognizer = prepare_and_train(dataset_path)
    run_recognition(knn, normalizer, label_map, detector, shape_predictor, face_recognizer)

if __name__ == "__main__":
    main()

GEMINI

In [None]:
import os
os.environ['DLIB_USE_CUDA'] = '0'  # Force CPU usage
import face_recognition
import os, sys
import cv2
import numpy as np
import math
from google.colab import files
from google.colab.patches import cv2_imshow

def face_confidence(face_distance, face_match_threshold=0.6):
    range = (1.0 - face_match_threshold)
    linear_val = (1.0 - face_distance) / float(range)

    if face_distance > face_match_threshold:
        return str(round(linear_val * 100, 2)) + '%'
    else:
        value = (linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))) * 100
        return str(round(value, 2)) + '%'

class FaceRecognition:
    face_locations = []
    face_encodings = []
    face_names = []
    known_face_encodings = []
    known_face_names = []
    process_current_frame = True

    def __init__(self, faces_dir='faces'):
        self.faces_dir = faces_dir
        self.encode_faces()

    def encode_faces(self):
        for image in os.listdir(self.faces_dir):
            face_image = face_recognition.load_image_file(os.path.join(self.faces_dir, image))
            face_encoding = face_recognition.face_encodings(face_image)
            if len(face_encoding) > 0:
                self.known_face_encodings.append(face_encoding[0])
                self.known_face_names.append(image)
            else:
                print(f"Warning: No face found in {image}")

        print(self.known_face_names)

    def run_recognition(self):
        video_capture = cv2.VideoCapture(0)

        if not video_capture.isOpened():
            sys.exit('Video source not found...')

        while True:
            ret, frame = video_capture.read()

            if self.process_current_frame:
                small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

                rgb_small_frame = small_frame[:, :, ::-1]

                self.face_locations = face_recognition.face_locations(rgb_small_frame)
                self.face_encodings = face_recognition.face_encodings(rgb_small_frame, self.face_locations)

                self.face_names = []
                for face_encoding in self.face_encodings:
                    matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding)
                    name = 'Unknown'
                    confidence = 'Unknown'

                    face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)

                    best_match_index = np.argmin(face_distances)
                    if matches[best_match_index]:
                        name = self.known_face_names[best_match_index]
                        confidence = face_confidence(face_distances[best_match_index])

                    self.face_names.append(f'{name} ({confidence})')

            self.process_current_frame = not self.process_current_frame

            for (top, right, bottom, left), name in zip(self.face_locations, self.face_names):
                top *= 4
                right *= 4
                bottom *= 4
                left *= 4

                cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), -1)
                cv2.putText(frame, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)

            cv2_imshow(frame)

            if cv2.waitKey(1) == ord('q'):
                break

        video_capture.release()
        cv2.destroyAllWindows()

if __name__ == '__main__':
    uploaded = files.upload()
    os.makedirs('faces', exist_ok=True)
    for filename in uploaded.keys():
        with open(f'faces/{filename}', 'wb') as f:
            f.write(uploaded[filename])

    fr = FaceRecognition()
    fr.run_recognition()

In [None]:
!pip install face_recognition
import cv2
import numpy as np
import math
import face_recognition
import os
import sys
from google.colab.patches import cv2_imshow

# Create faces directory if it doesn't exist
if not os.path.exists('faces'):
    os.makedirs('faces')

# Function to calculate face confidence
def face_confidence(face_distance, face_match_threshold=0.6):
    range_val = 1.0 - face_match_threshold
    linear_val = (1.0 - face_distance) / (range_val * 2.0)

    if face_distance > face_match_threshold:
        return str(round(linear_val * 100, 2)) + '%'
    else:
        value = (linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))) * 100
        return str(round(value, 2)) + '%'

class FaceRecognition:
    face_locations = []
    face_encodings = []
    face_names = []
    known_face_encodings = []
    known_face_names = []
    process_current_frame = True

    def __init__(self):
        self.encode_faces()

    def encode_faces(self):
        for image in os.listdir('faces'):
            face_image = face_recognition.load_image_file(f'faces/{image}')
            face_encoding = face_recognition.face_encodings(face_image)
            if face_encoding:
                self.known_face_encodings.append(face_encoding[0])
                self.known_face_names.append(image.split('.')[0])  # Use filename without extension as name
            else:
                print(f"Warning: No face found in {image}")

        print("Known faces:", self.known_face_names)

    def run_recognition(self):
        video_capture = cv2.VideoCapture(0)

        if not video_capture.isOpened():
            print("Video source not found...") #prints a message instead of sys.exit.
            return #exits the function instead of sys.exit

        while True:
            ret, frame = video_capture.read()

            if not ret:
                print("Failed to capture frame")
                break

            if self.process_current_frame:
                small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
                rgb_small_frame = small_frame[:, :, ::-1]

                self.face_locations = face_recognition.face_locations(rgb_small_frame)
                self.face_encodings = face_recognition.face_encodings(rgb_small_frame, self.face_locations)

                self.face_names = []
                for face_encoding in self.face_encodings:
                    matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding)
                    name = 'Unknown'
                    confidence = 'Unknown'

                    face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                    best_match_index = np.argmin(face_distances)
                    if matches[best_match_index]:
                        name = self.known_face_names[best_match_index]
                        confidence = face_confidence(face_distances[best_match_index])

                    self.face_names.append(f'{name} ({confidence})')

                self.process_current_frame = not self.process_current_frame

            for (top, right, bottom, left), name in zip(self.face_locations, self.face_names):
                top *= 4
                right *= 4
                bottom *= 4
                left *= 4

                cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
                cv2.putText(frame, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)

            cv2_imshow(frame)

            if cv2.waitKey(1) == ord('q'):
                break

        video_capture.release()
        cv2.destroyAllWindows()

if __name__ == '__main__':
    fr = FaceRecognition()
    fr.run_recognition()

In [None]:
!pip install face_recognition


In [None]:
# Install required libraries (CPU-only or GPU with CUDA)
try:
    import torch
    import torchvision
    !pip install opencv-python pandas openpyxl pillow torch torchvision --index-url https://download.pytorch.org/whl/cu118
    print("CUDA available, using GPU acceleration.")
    device = torch.device("cuda:0")
except ImportError:
    !pip install opencv-python pandas openpyxl pillow
    print("CUDA not available, using CPU.")
    device = torch.device("cpu")

import cv2
import zipfile
import os
import numpy as np
import pandas as pd
from google.colab import files
from pathlib import Path
from datetime import datetime
from IPython.display import display, Javascript, HTML
from google.colab.output import eval_js
from base64 import b64decode
import time
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

# Step 1: Upload and extract the zipped dataset
display(HTML("<h2 style='color: #1E88E5; font-family: Arial;'>Upload Your Dataset</h2>"))
display(HTML("<p style='font-family: Arial;'>Please upload a zip file with subfolders of known faces (e.g., dataset/person1/img1.jpg).</p>"))
uploaded = files.upload()
extracted_folder = "dataset"
zip_file_name = list(uploaded.keys())[0]
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder)

display(HTML(f"<p style='color: #388E3C; font-family: Arial;'>Extracted contents: {os.listdir(extracted_folder)}</p>"))

# Step 2: Load Haar Cascades for frontal and profile faces (improved detection)
cascade_frontal = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
cascade_profile = cv2.data.haarcascades + "haarcascade_profileface.xml"
face_cascade_frontal = cv2.CascadeClassifier(cascade_frontal)
face_cascade_profile = cv2.CascadeClassifier(cascade_profile)
if face_cascade_frontal.empty() or face_cascade_profile.empty():
    raise ValueError("Error: Failed to load Haar cascade files.")

# Step 3: Prepare a CNN Model and train with provided dataset (Enhanced Accuracy)
class FaceDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = Image.open(self.img_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 25 * 25, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

img_paths = []
labels = []
label_dict = {}
current_label = 0

for root, _, file_list in os.walk(extracted_folder):
    person_name = Path(root).name
    if not file_list or person_name == extracted_folder:
        continue
    for filename in file_list:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(root, filename)
            img_paths.append(img_path)
            if person_name not in label_dict:
                label_dict[current_label] = person_name
                current_label += 1
            label_id = [k for k, v in label_dict.items() if v == person_name][0]
            labels.append(label_id)

transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = FaceDataset(img_paths, labels, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = FaceRecognitionModel(len(label_dict)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    for images, label_ids in train_loader:
        images, label_ids = images.to(device), label_ids.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, label_ids)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

display(HTML(f"<p style='color: #388E3C; font-family: Arial;'>Trained with {len(img_paths)} faces from {len(label_dict)} persons: {label_dict}</p>"))

# Step 4: Webcam function with automatic capture (same as before)
def capture_frame(filename='photo.jpg', quality=0.8):
    # ... (same as before)
    js = Javascript('''
    async function captureFrame(quality) {
        // ... (same as before)
    }
    ''')
    display(js)
    data = eval_js('captureFrame({})'.format(quality))
    if data is None:
        return None
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Step 5: Process frames and log timestamps for multiple faces (Enhanced Accuracy)
def process_frame(img_path, capture_num, log_data):
    frame = cv2.imread(img_path)
    if frame is None:
        display(HTML("<p style='color: #F44336; font-family: Arial;'>Error: Failed to load image.</p>"))
        return None
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frontal_faces = face_cascade_frontal.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    profile_faces = face_cascade_profile.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    faces_detected = np.vstack((frontal_faces, profile_faces)) if len(frontal_faces) > 0 and len(profile_faces) > 0 else frontal_faces if len(frontal_faces) > 0 else profile_faces
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    entries = []
    print(f"Capture {capture_num + 1}: Detected {len(faces_detected)} faces")
    detected_names = set()

In [None]:
!pip uninstall -y torch torchvision

In [None]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118

In [None]:
!pip uninstall -y torch torchvision
!pip install torch torchvision

In [None]:
import torch
import torchvision

print(f"PyTorch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

In [None]:
# Install required libraries (CPU-only or GPU with CUDA)
try:
    import torch
    import torchvision
    !pip install opencv-python pandas openpyxl pillow torch torchvision --index-url https://download.pytorch.org/whl/cu118
    print("CUDA available, using GPU acceleration.")
    device = torch.device("cuda:0")
except ImportError:
    !pip install opencv-python pandas openpyxl pillow
    print("CUDA not available, using CPU.")
    device = torch.device("cpu")

import cv2
import zipfile
import os
import numpy as np
import pandas as pd
from google.colab import files
from pathlib import Path
from datetime import datetime
from IPython.display import display, Javascript, HTML
from google.colab.output import eval_js
from base64 import b64decode
import time
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

# Step 1: Upload and extract the zipped dataset
display(HTML("<h2 style='color: #1E88E5; font-family: Arial;'>Upload Your Dataset</h2>"))
display(HTML("<p style='font-family: Arial;'>Please upload a zip file with subfolders of known faces (e.g., dataset/person1/img1.jpg).</p>"))
uploaded = files.upload()
extracted_folder = "dataset"
zip_file_name = list(uploaded.keys())[0]
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder)

display(HTML(f"<p style='color: #388E3C; font-family: Arial;'>Extracted contents: {os.listdir(extracted_folder)}</p>"))

# Step 2: Load Haar Cascades for frontal and profile faces (improved detection)
cascade_frontal = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
cascade_profile = cv2.data.haarcascades + "haarcascade_profileface.xml"
face_cascade_frontal = cv2.CascadeClassifier(cascade_frontal)
face_cascade_profile = cv2.CascadeClassifier(cascade_profile)
if face_cascade_frontal.empty() or face_cascade_profile.empty():
    raise ValueError("Error: Failed to load Haar cascade files.")

# Step 3: Prepare a CNN Model and train with provided dataset (Enhanced Accuracy)
class FaceDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = Image.open(self.img_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 25 * 25, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

img_paths = []
labels = []
label_dict = {}
current_label = 0

for root, _, file_list in os.walk(extracted_folder):
    person_name = Path(root).name
    if not file_list or person_name == extracted_folder:
        continue
    for filename in file_list:
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(root, filename)
            img_paths.append(img_path)
            if person_name not in label_dict:
                label_dict[current_label] = person_name
                current_label += 1
            label_id = [k for k, v in label_dict.items() if v == person_name][0]
            labels.append(label_id)

transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = FaceDataset(img_paths, labels, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = FaceRecognitionModel(len(label_dict)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    for images, label_ids in train_loader:
        images, label_ids = images.to(device), label_ids.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, label_ids)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

display(HTML(f"<p style='color: #388E3C; font-family: Arial;'>Trained with {len(img_paths)} faces from {len(label_dict)} persons: {label_dict}</p>"))

# Step 4: Webcam function with automatic capture (same as before)
def capture_frame(filename='photo.jpg', quality=0.8):
    # ... (same as before)
    js = Javascript('''
    async function captureFrame(quality) {
        // ... (same as before)
    }
    ''')
    display(js)
    data = eval_js('captureFrame({})'.format(quality))
    if data is None:
        return None
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Step 5: Process frames and log timestamps for multiple faces (Enhanced Accuracy)
def process_frame(img_path, capture_num, log_data):
    frame = cv2.imread(img_path)
    if frame is None:
        display(HTML("<p style='color: #F44336; font-family: Arial;'>Error: Failed to load image.</p>"))
        return None
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frontal_faces = face_cascade_frontal.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    profile_faces = face_cascade_profile.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    faces_detected = np.vstack((frontal_faces, profile_faces)) if len(frontal_faces) > 0 and len(profile_faces) > 0 else frontal_faces if len(frontal_faces) > 0 else profile_faces
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    entries = []
    print(f"Capture {capture_num + 1}: Detected {len(faces_detected)} faces")
    detected_names = set()


In [None]:
# Step 4: Webcam function with automatic capture
def capture_frame(filename='photo.jpg', quality=0.8):
    js = Javascript('''
    async function captureFrame(quality) {
        const div = document.createElement('div');
        const video = document.createElement('video');
        video.style.display = 'block';
        const stream = await navigator.mediaDevices.getUserMedia({video: true});

        document.body.appendChild(div);
        div.appendChild(video);
        video.srcObject = stream;
        await video.play();

        // Resize the output to fit the video element.
        google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

        // Wait for Capture to be clicked.
        await new Promise((resolve) => setTimeout(resolve, 30000)); // Wait for 30 seconds

        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        canvas.getContext('2d').drawImage(video, 0, 0);
        stream.getVideoTracks()[0].stop();
        div.remove();
        return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
    display(js)
    data = eval_js('captureFrame({})'.format(quality))
    if data is None:
        return None
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

# Step 5: Process frames and log timestamps for multiple faces (Enhanced Accuracy)
def process_frame(img_path, capture_num, log_data):
    frame = cv2.imread(img_path)
    if frame is None:
        display(HTML("<p style='color: #F44336; font-family: Arial;'>Error: Failed to load image.</p>"))
        return None
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frontal_faces = face_cascade_frontal.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    profile_faces = face_cascade_profile.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=5, minSize=(30, 30))
    faces_detected = np.vstack((frontal_faces, profile_faces)) if len(frontal_faces) > 0 and len(profile_faces) > 0 else frontal_faces if len(frontal_faces) > 0 else profile_faces
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    entries = []
    print(f"Capture {capture_num + 1}: Detected {len(faces_detected)} faces")
    detected_names = set()

    for i, (x, y, w, h) in enumerate(faces_detected):
        face_img = frame[y:y+h, x:x+w]
        face_img = cv2.resize(face_img, (100, 100))
        face_img = Image.fromarray(face_img).convert('RGB')
        face_img = transform(face_img).unsqueeze(0).to(device)
        outputs = model(face_img)
        _, predicted = torch.max(outputs, 1)
        predicted_label = label_dict[predicted.item()]
        detected_names.add(predicted_label)
        entries.append([timestamp, predicted_label, x, y, w, h])
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, predicted_label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    log_data.extend(entries)
    display(HTML(f"<p style='color: #1E88E5; font-family: Arial;'>Capture {capture_num + 1}: Recognized {', '.join(detected_names)}</p>"))
    cv2.imwrite(f"capture_{capture_num + 1}.jpg", frame)
    return frame

# Step 6: Automatically capture and process frames at 30-second intervals
log_data = []
for i in range(4):
    img_path = capture_frame(filename=f'capture_{i + 1}.jpg')
    if img_path:
        process_frame(img_path, i, log_data)
    time.sleep(30)  # Wait for 30 seconds before the next capture

# Step 7: Save the log data to an Excel file
log_df = pd.DataFrame(log_data, columns=["Timestamp", "Name", "X", "Y", "Width", "Height"])
log_df.to_excel("face_recognition_log.xlsx", index=False)
display(HTML("<p style='color: #388E3C; font-family: Arial;'>Face recognition log saved to 'face_recognition_log.xlsx'.</p>"))

In [None]:
!wget -O deploy.prototxt https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt
!wget -O res10_300x300_ssd_iter_140000.caffemodel https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel


In [None]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")


In [None]:
!wget -O deploy.prototxt https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt
!wget -O res10_300x300_ssd_iter_140000.caffemodel https://raw.githubusercontent.com/opencv/opencv_3rdparty/master/models/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel


In [None]:
!wget -O res10_300x300_ssd_iter_140000.caffemodel https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx


In [None]:
prototxt_path = "deploy.prototxt"
caffemodel_path = "res10_300x300_ssd_iter_140000.caffemodel"

face_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)


In [None]:
!wget -O deploy.prototxt https://github.com/opencv/opencv/raw/master/samples/dnn/face_detector/deploy.prototxt
!wget -O res10_300x300_ssd_iter_140000.caffemodel https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx


In [None]:
!ls -lh res10_300x300_ssd_iter_140000.caffemodel


In [None]:
import cv2

prototxt_path = "deploy.prototxt"
caffemodel_path = "res10_300x300_ssd_iter_140000.caffemodel"

face_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

print("Model loaded successfully!")


In [None]:
!wget -O yunet.onnx https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx


In [None]:
face_net = cv2.dnn.readNet("yunet.onnx")


In [None]:
import torch
import torchvision
import cv2
import zipfile
import os
import numpy as np
import pandas as pd
from google.colab import files
from pathlib import Path
from datetime import datetime
from IPython.display import display, Javascript, HTML
from google.colab.output import eval_js
from base64 import b64decode
import time
from PIL import Image
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim

# Install dependencies (CUDA support if available)
try:
    !pip install opencv-python pandas openpyxl pillow torch torchvision --index-url https://download.pytorch.org/whl/cu118
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
except ImportError:
    !pip install opencv-python pandas openpyxl pillow
    device = torch.device("cpu")
    print("Using CPU.")

# Step 1: Upload and extract the dataset
display(HTML("<h2 style='color: #1E88E5;'>Upload Your Dataset</h2>"))
uploaded = files.upload()
extracted_folder = "dataset"
zip_file_name = list(uploaded.keys())[0]
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder)

display(HTML(f"<p style='color: #388E3C;'>Extracted: {os.listdir(extracted_folder)}</p>"))

# Load DNN-based face detection model
prototxt_path = cv2.data.haarcascades + "deploy.prototxt"
caffemodel_path = cv2.data.haarcascades + "res10_300x300_ssd_iter_140000.caffemodel"
face_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

# Step 2: Prepare Pretrained ResNet for Face Recognition
class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# Step 3: Load dataset and prepare training
def load_dataset():
    img_paths, labels, label_dict = [], [], {}
    label_index = 0

    for root, _, file_list in os.walk(extracted_folder):
        person_name = Path(root).name
        if not file_list or person_name == extracted_folder:
            continue
        for filename in file_list:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_paths.append(os.path.join(root, filename))
                if person_name not in label_dict:
                    label_dict[label_index] = person_name
                    label_index += 1
                labels.append([k for k, v in label_dict.items() if v == person_name][0])

    return img_paths, labels, label_dict

img_paths, labels, label_dict = load_dataset()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class FaceDataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, labels, transform):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = Image.open(self.img_paths[idx]).convert('RGB')
        return self.transform(img), self.labels[idx]

train_loader = torch.utils.data.DataLoader(FaceDataset(img_paths, labels, transform), batch_size=32, shuffle=True)

# Train the model
model = FaceRecognitionModel(len(label_dict)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for images, label_ids in train_loader:
        images, label_ids = images.to(device), label_ids.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, label_ids)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/10, Loss: {loss.item():.4f}")

# Step 4: Webcam function for automatic capture
def capture_frame(filename='photo.jpg'):
    js = Javascript('''
    async function captureFrame() {
        const video = document.createElement('video');
        const stream = await navigator.mediaDevices.getUserMedia({video: true});
        document.body.appendChild(video);
        video.srcObject = stream;
        await video.play();
        await new Promise((resolve) => setTimeout(resolve, 10000));
        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        canvas.getContext('2d').drawImage(video, 0, 0);
        stream.getVideoTracks()[0].stop();
        return canvas.toDataURL('image/jpeg');
    }
    ''')
    display(js)
    data = eval_js('captureFrame()')
    if data is None:
        return None
    with open(filename, 'wb') as f:
        f.write(b64decode(data.split(',')[1]))
    return filename

# Step 5: Face detection and recognition
def process_frame(img_path):
    frame = cv2.imread(img_path)
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
    face_net.setInput(blob)
    detections = face_net.forward()
    detected_faces = []

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:
            box = detections[0, 0, i, 3:7] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]])
            (x, y, x2, y2) = box.astype('int')
            detected_faces.append((x, y, x2-x, y2-y))
            face_img = frame[y:y2, x:x2]
            face_img = transform(Image.fromarray(face_img).convert('RGB')).unsqueeze(0).to(device)
            _, predicted = torch.max(model(face_img), 1)
            predicted_label = label_dict[predicted.item()]
            cv2.rectangle(frame, (x, y), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, predicted_label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    cv2.imwrite("processed.jpg", frame)
    return frame

# Step 6: Capture and Process
detected_faces = process_frame(capture_frame())
display(HTML("<p style='color: #388E3C;'>Processed Image Saved.</p>"))


In [None]:
!pip install face_recognition opencv-python pandas openpyxl


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pytesseract
from transformers import MistralForCausalLM, AutoTokenizer
import json
import re
import logging
import random
from tqdm import tqdm

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class DoctorHandwritingOCR:
    def __init__(self, data_dir="./data", output_dir="./output", model_save_path="./models"):
        """
        Initialize the Doctor's Handwriting OCR system.

        Args:
            data_dir: Directory containing the training data
            output_dir: Directory to store output files
            model_save_path: Directory to save trained models
        """
        self.data_dir = data_dir
        self.output_dir = output_dir
        self.model_save_path = model_save_path

        # Create directories if they don't exist
        for directory in [data_dir, output_dir, model_save_path]:
            os.makedirs(directory, exist_ok=True)

        # Initialize Mistral model for language improvement
        self.tokenizer = None
        self.mistral_model = None
        self.ocr_model = None
        self.image_size = (224, 224)
        self.batch_size = 16

        # Set path for tesseract (update this to your installation path)
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

    def load_mistral_model(self):
        """Load the Mistral language model for refining OCR results."""
        logger.info("Loading Mistral model for text refinement...")
        try:
            model_name = "mistralai/Mistral-7B-v0.1"
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.mistral_model = MistralForCausalLM.from_pretrained(model_name)
            logger.info("Mistral model loaded successfully.")
        except Exception as e:
            logger.error(f"Error loading Mistral model: {str(e)}")
            logger.info("Continuing without Mistral refinement.")

    def preprocess_image(self, image_path):
        """
        Preprocess an image for OCR.

        Args:
            image_path: Path to the image file

        Returns:
            Preprocessed image ready for OCR
        """
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            logger.error(f"Failed to load image: {image_path}")
            return None

        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to handle different lighting conditions
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # Noise removal
        kernel = np.ones((2, 2), np.uint8)
        opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

        # Dilation to connect nearby text
        dilation = cv2.dilate(opening, kernel, iterations=1)

        # Resize image to standardized size
        processed = cv2.resize(dilation, self.image_size)

        return processed

    def extract_text_with_tesseract(self, image):
        """
        Extract text from an image using Tesseract OCR.

        Args:
            image: Preprocessed image

        Returns:
            Extracted text
        """
        # Convert back to PIL Image for Tesseract
        pil_image = Image.fromarray(image)

        # Use Tesseract OCR with medical dictionary
        custom_config = r'--oem 3 --psm 6 -l eng+med'
        text = pytesseract.image_to_string(pil_image, config=custom_config)

        return text.strip()

    def refine_text_with_mistral(self, raw_text):
        """
        Refine the OCR-extracted text using the Mistral language model.

        Args:
            raw_text: Raw text from OCR

        Returns:
            Refined text
        """
        if self.mistral_model is None or self.tokenizer is None:
            return raw_text

        try:
            prompt = f"Correct this medical text that was extracted from a doctor's handwriting: '{raw_text}'"
            inputs = self.tokenizer(prompt, return_tensors="pt")

            # Generate improved text
            outputs = self.mistral_model.generate(
                inputs["input_ids"],
                max_length=256,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2
            )

            refined_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract the correction part
            match = re.search(r"doctor's handwriting:(.*)", refined_text, re.IGNORECASE)
            if match:
                return match.group(1).strip()
            return refined_text.strip()
        except Exception as e:
            logger.error(f"Error refining text with Mistral: {str(e)}")
            return raw_text

    def create_dataset(self, annotations_file):
        """
        Create a dataset from images and their ground truth annotations.

        Args:
            annotations_file: CSV file containing image paths and ground truth text

        Returns:
            DataFrame with image paths, processed images, OCR text, and ground truth
        """
        logger.info("Creating dataset from annotations...")

        # Read annotations
        df = pd.read_csv(annotations_file)
        dataset = []

        for _, row in tqdm(df.iterrows(), total=len(df)):
            image_path = os.path.join(self.data_dir, row['image_path'])
            ground_truth = row['text']

            # Preprocess image
            processed_image = self.preprocess_image(image_path)
            if processed_image is None:
                continue

            # Extract text with OCR
            ocr_text = self.extract_text_with_tesseract(processed_image)

            dataset.append({
                'image_path': image_path,
                'processed_image': processed_image,
                'ocr_text': ocr_text,
                'ground_truth': ground_truth
            })

        return pd.DataFrame(dataset)

    def build_enhancement_model(self):
        """
        Build a deep learning model to enhance OCR results.

        Returns:
            Compiled model
        """
        logger.info("Building OCR enhancement model...")

        # Use MobileNetV2 as base model
        base_model = MobileNetV2(
            input_shape=(self.image_size[0], self.image_size[1], 3),
            include_top=False,
            weights='imagenet'
        )

        # Freeze base model layers
        for layer in base_model.layers:
            layer.trainable = False

        # Create model
        model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.Dense(1024, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.3),
            layers.Dense(256, activation='relu'),
            layers.Dense(128, activation='relu')
        ])

        # Add LSTM layers for sequence modeling
        model.add(layers.Reshape((1, 128)))
        model.add(layers.LSTM(128, return_sequences=True))
        model.add(layers.LSTM(64))

        # Output layer
        model.add(layers.Dense(1000, activation='softmax'))  # Assuming vocabulary size of 1000

        # Compile model
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )

        return model

    def train_model(self, dataset, epochs=30):
        """
        Train the OCR enhancement model.

        Args:
            dataset: DataFrame containing training data
            epochs: Number of training epochs

        Returns:
            Trained model
        """
        logger.info("Training OCR enhancement model...")

        # Split dataset into training and validation sets
        train_df, val_df = train_test_split(dataset, test_size=0.2, random_state=42)

        # Build model
        model = self.build_enhancement_model()

        # Data generators
        # Note: In a real implementation, you'd need to convert text to sequences
        # and create proper data generators

        # Early stopping
        early_stopping = callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )

        # Model checkpoint
        checkpoint = callbacks.ModelCheckpoint(
            os.path.join(self.model_save_path, 'best_model.h5'),
            monitor='val_loss',
            save_best_only=True
        )

        # Train model (placeholder - actual training would use data generators)
        logger.info("Model architecture created. In a real implementation, you would train with actual data.")

        # Save model
        model.save(os.path.join(self.model_save_path, 'final_model.h5'))

        self.ocr_model = model
        return model

    def recognize_text(self, image_path):
        """
        Recognize text from a doctor's handwriting image.

        Args:
            image_path: Path to the image file

        Returns:
            Recognized text
        """
        logger.info(f"Recognizing text from image: {image_path}")

        # Preprocess image
        processed_image = self.preprocess_image(image_path)
        if processed_image is None:
            return "Error: Could not process image."

        # Extract text with OCR
        ocr_text = self.extract_text_with_tesseract(processed_image)
        logger.info(f"Raw OCR text: {ocr_text}")

        # Refine text with Mistral
        refined_text = self.refine_text_with_mistral(ocr_text)
        logger.info(f"Refined text: {refined_text}")

        return {
            'raw_ocr': ocr_text,
            'refined_text': refined_text
        }

    def evaluate_model(self, test_dataset):
        """
        Evaluate the model on test data.

        Args:
            test_dataset: DataFrame containing test data

        Returns:
            Evaluation metrics
        """
        logger.info("Evaluating model performance...")

        results = []
        for _, row in tqdm(test_dataset.iterrows(), total=len(test_dataset)):
            # Extract raw OCR text
            ocr_text = row['ocr_text']

            # Refine text with Mistral
            refined_text = self.refine_text_with_mistral(ocr_text)

            # Compare with ground truth
            ground_truth = row['ground_truth']

            # Calculate metrics
            raw_accuracy = self.calculate_text_similarity(ocr_text, ground_truth)
            refined_accuracy = self.calculate_text_similarity(refined_text, ground_truth)

            results.append({
                'image_path': row['image_path'],
                'raw_ocr': ocr_text,
                'refined_text': refined_text,
                'ground_truth': ground_truth,
                'raw_accuracy': raw_accuracy,
                'refined_accuracy': refined_accuracy
            })

        # Save results
        results_df = pd.DataFrame(results)
        results_df.to_csv(os.path.join(self.output_dir, 'evaluation_results.csv'), index=False)

        # Calculate overall metrics
        overall_raw_accuracy = results_df['raw_accuracy'].mean()
        overall_refined_accuracy = results_df['refined_accuracy'].mean()

        logger.info(f"Overall raw OCR accuracy: {overall_raw_accuracy:.4f}")
        logger.info(f"Overall refined text accuracy: {overall_refined_accuracy:.4f}")

        return {
            'raw_accuracy': overall_raw_accuracy,
            'refined_accuracy': overall_refined_accuracy,
            'detailed_results': results_df
        }

    def calculate_text_similarity(self, text1, text2):
        """
        Calculate similarity between two texts.

        Args:
            text1: First text
            text2: Second text

        Returns:
            Similarity score (0-1)
        """
        # Simple Levenshtein distance-based similarity
        from difflib import SequenceMatcher
        return SequenceMatcher(None, text1, text2).ratio()

    def create_sample_data(self, num_samples=100):
        """
        Create sample data for demonstration purposes.

        Args:
            num_samples: Number of samples to create

        Returns:
            Path to the created annotations file
        """
        logger.info(f"Creating {num_samples} sample data entries...")

        # Create sample data directory
        sample_data_dir = os.path.join(self.data_dir, 'sample')
        os.makedirs(sample_data_dir, exist_ok=True)

        # Sample medical terms
        medical_terms = [
            "paracetamol 500mg twice daily",
            "ibuprofen 400mg three times daily",
            "amoxicillin 250mg every 8 hours",
            "metformin 500mg with meals",
            "lisinopril 10mg once daily",
            "atorvastatin 20mg at bedtime",
            "levothyroxine 50mcg in the morning",
            "albuterol inhaler 2 puffs as needed",
            "aspirin 81mg daily",
            "omeprazole 20mg before breakfast"
        ]

        # Create CSV file
        annotations = []
        for i in range(num_samples):
            # Generate random text
            text = random.choice(medical_terms)

            # For demonstration, we'd create actual images here
            # Instead, we'll just create placeholder entries
            image_path = f"sample/image_{i:03d}.png"
            annotations.append({
                'image_path': image_path,
                'text': text
            })

        # Save annotations
        annotations_df = pd.DataFrame(annotations)
        annotations_file = os.path.join(self.data_dir, 'sample_annotations.csv')
        annotations_df.to_csv(annotations_file, index=False)

        logger.info(f"Sample data created and saved to {annotations_file}")
        return annotations_file

    def run_complete_pipeline(self, annotations_file=None, num_samples=100):
        """
        Run the complete pipeline from data creation to evaluation.

        Args:
            annotations_file: Path to annotations file (optional)
            num_samples: Number of samples to create if annotations_file is None

        Returns:
            Evaluation results
        """
        # Create sample data if annotations file not provided
        if annotations_file is None:
            annotations_file = self.create_sample_data(num_samples)

        # Load Mistral model
        self.load_mistral_model()

        # Create dataset
        dataset = self.create_dataset(annotations_file)

        # Split dataset
        train_val_df, test_df = train_test_split(dataset, test_size=0.2, random_state=42)

        # Train model
        self.train_model(train_val_df)

        # Evaluate model
        results = self.evaluate_model(test_df)

        return results

# Example usage
if __name__ == "__main__":
    # Initialize the OCR system
    ocr_system = DoctorHandwritingOCR()

    # Run the complete pipeline
    results = ocr_system.run_complete_pipeline()

    # Recognize text from a new image
    image_path = "./data/new_prescription.png"
    if os.path.exists(image_path):
        recognized_text = ocr_system.recognize_text(image_path)
        print("Recognized text:")
        print(f"Raw OCR: {recognized_text['raw_ocr']}")
        print(f"Refined text: {recognized_text['refined_text']}")