# General Object Recognition Project

Import openCV, numpy and tensorflow for deep computer vision. Make sure required dependencies are installed using the requirements.txt file.

In [2]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions


Load the MobileNetV2 model pre-trained on ImageNet and initialize webcam

In [3]:
model = tf.keras.applications.MobileNetV2(weights='imagenet')
cap = cv2.VideoCapture(0)

Check if webcam is working

In [4]:
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

## Computer vision model: MobileNetV2 pre-trained on ImageNet

The model is very limited in its capacity to recognize different objects. It analyzes the video frame by frame, to stop the program press Q

In [5]:
while True:
    # Capture frame-by-frame from webcam
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture image.")
        break

    # Resize the frame to match the input size of the model (224x224 for MobileNetV2)
    resized_frame = cv2.resize(frame, (224, 224))
    
    # Preprocess the frame for MobileNetV2 (normalize the input image)
    preprocessed_frame = preprocess_input(np.expand_dims(resized_frame, axis=0))

    # Perform prediction
    predictions = model.predict(preprocessed_frame)
    
    # Decode the prediction (get the top 3 predictions)
    decoded_predictions = decode_predictions(predictions, top=3)[0]

    # Get the highest confidence prediction
    label, confidence = decoded_predictions[0][1], decoded_predictions[0][2]

    # Display the result on the frame
    text = f"{label}: {confidence:.2f}"
    cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Show the frame
    cv2.imshow('Textbook Detection', frame)

    # Press 'q' to quit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms