In [None]:
%pip3 install opencv-python matplotlib

In [None]:
# Install OpenCV if needed
import cv2
import numpy as np
import matplotlib.pyplot as plt
from urllib.request import urlopen
import os

# Function to download and save an image if it doesn't exist
def get_sample_image(url, filename="sample_image.jpg"):
    if not os.path.exists(filename):
        with urlopen(url) as response:
            image_data = response.read()
        with open(filename, 'wb') as f:
            f.write(image_data)
    return filename

# Download a sample image
image_path = get_sample_image("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/fruits.jpg")

# Read the image
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# 1. Harris Corner Detection
gray_float = np.float32(gray)
corners = cv2.cornerHarris(gray_float, blockSize=2, ksize=3, k=0.04)
# Dilate to mark the corners
corners = cv2.dilate(corners, None)
# Create a copy of the original image
img_harris = img.copy()
# Mark corners with red color (threshold for best corners)
img_harris[corners > 0.01 * corners.max()] = [0, 0, 255]

# 2. Shi-Tomasi Corner Detection
corners_st = cv2.goodFeaturesToTrack(gray, maxCorners=50, qualityLevel=0.01, minDistance=10)
img_shi_tomasi = img.copy()
# Draw circles around detected corners
if corners_st is not None:
    for corner in corners_st:
        x, y = corner.ravel()
        cv2.circle(img_shi_tomasi, (int(x), int(y)), 5, (0, 255, 0), -1)

# 3. SIFT (Scale-Invariant Feature Transform)
sift = cv2.SIFT_create()
keypoints_sift = sift.detect(gray, None)
img_sift = cv2.drawKeypoints(img, keypoints_sift, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# 4. ORB (Oriented FAST and Rotated BRIEF)
orb = cv2.ORB_create(nfeatures=200)
keypoints_orb = orb.detect(gray, None)
img_orb = cv2.drawKeypoints(img, keypoints_orb, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# 5. Canny Edge Detection
edges = cv2.Canny(gray, 100, 200)

# Convert BGR to RGB for matplotlib display
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_harris_rgb = cv2.cvtColor(img_harris, cv2.COLOR_BGR2RGB)
img_shi_tomasi_rgb = cv2.cvtColor(img_shi_tomasi, cv2.COLOR_BGR2RGB)
img_sift_rgb = cv2.cvtColor(img_sift, cv2.COLOR_BGR2RGB)
img_orb_rgb = cv2.cvtColor(img_orb, cv2.COLOR_BGR2RGB)

# Display the results
plt.figure(figsize=(20, 15))

plt.subplot(2, 3, 1)
plt.imshow(img_rgb)
plt.title('Original Image')
plt.axis('off')

plt.subplot(2, 3, 2)
plt.imshow(img_harris_rgb)
plt.title('Harris Corner Detection')
plt.axis('off')

plt.subplot(2, 3, 3)
plt.imshow(img_shi_tomasi_rgb)
plt.title('Shi-Tomasi Corner Detection')
plt.axis('off')

plt.subplot(2, 3, 4)
plt.imshow(img_sift_rgb)
plt.title('SIFT Features')
plt.axis('off')

plt.subplot(2, 3, 5)
plt.imshow(img_orb_rgb)
plt.title('ORB Features')
plt.axis('off')

plt.subplot(2, 3, 6)
plt.imshow(edges, cmap='gray')
plt.title('Canny Edge Detection')
plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

# Load pre-trained ResNet50 model
model = ResNet50(weights='imagenet')

# Function to predict what's in the image
def predict_image_content(image_path):
    # Read and preprocess the image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img, (224, 224))
    img_array = np.expand_dims(img_resized, axis=0)
    img_preprocessed = preprocess_input(img_array)
    
    # Make prediction
    predictions = model.predict(img_preprocessed)
    decoded = decode_predictions(predictions, top=5)[0]
    
    # Display results
    plt.figure(figsize=(12, 6))
    
    # Original image
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.title('Original Image')
    plt.axis('off')
    
    # Predictions
    plt.subplot(1, 2, 2)
    plt.barh([pred[1] for pred in decoded], [pred[2] for pred in decoded])
    plt.xlabel('Probability')
    plt.title('Top 5 Predictions')
    
    plt.tight_layout()
    plt.show()
    
    # Return top prediction
    return f"This is most likely a {decoded[0][1]} ({decoded[0][2]*100:.2f}% confidence)"

# Use the same image from your previous code
result = predict_image_content(image_path)
print(result)

In [None]:
import cv2
import numpy as np
import time
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

# Load pre-trained model
model = ResNet50(weights='imagenet') # ResNet50 is a deep learning model trained on the ImageNet dataset, which contains millions of images across thousands of categories. This model is capable of recognizing a wide variety of objects and scenes in images.

def process_frame(frame): # each time we capture a frame this function will be called
    # Preprocess the frame for the model
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # convert BGR to RGB BGR is default in OpenCV it is Blue Green Red
    resized = cv2.resize(frame_rgb, (224, 224)) # resize to 224x224 as required by ResNet50
    img_array = np.expand_dims(resized, axis=0) # add batch dimension, so before its: (224, 224, 3) now its (1, 224, 224, 3) This is necessary because neural networks like ResNet50 expect inputs in batches. The added dimension represents a batch of size 1 (single image). Without this, the model would throw an error because it expects a 4D tensor input with the first dimension being the batch size. Think of it as putting your single image into a "batch" so the model can process it, even though you're only sending one image at a time.
    preprocessed = preprocess_input(img_array) # preprocess the image for ResNet50 this function normalizes the image data to the range that the model was trained on and also applies any other preprocessing steps that the model requires to make sure each image is in the correct format.
    
    # Make prediction
    preds = model.predict(preprocessed) # use the model to predict the class of the image
    decoded = decode_predictions(preds, top=3)[0] # get the top 3 predictions for the image
    
    return decoded # return the decoded predictions

def run_video_detection(source=0):  # 0 for webcam, or provide a video file path
    # Initialize video capture source
    cap = cv2.VideoCapture(source)
    if not cap.isOpened(): # if nothing is detected then no image source is opened
        print("Error: Could not open video source")
        return
    
    # Set a reasonable frame size this is the size of the frame where we see the video
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    
    # Process frames
    last_prediction_time = time.time() # Initialize last prediction time which would be when the function is called
    prediction_interval = 1.0  # seconds between predictions (to avoid lag)
    current_predictions = None  # Store the current predictions current_predictions is None at the start
    
    # keep reading frames from the video source
    while True:
        ret, frame = cap.read() # read a frame from the video source, ret is a boolean indicating if the frame was read successfully
        if not ret: # if we got nothing from the video source then break
            break
            
        # Make prediction only every few frames to improve performance 
        current_time = time.time() # get the current time
        if current_time - last_prediction_time > prediction_interval: # if the time since the last prediction is greater than the prediction interval
            # Update predictions
            current_predictions = process_frame(frame) # process the current frame and get predictions for it
            last_prediction_time = current_time # update the last prediction time to the current time as we made a prediction
            
        # Always display the current predictions (even for frames we don't process, to avoid blinking text)
        if current_predictions is not None: # as long as we have predictions
            # Draw predictions on the frame (top 3 predictions)
            y_pos = 30
            for i, (_, label, score) in enumerate(current_predictions):
                text = f"{label}: {score:.2f}"
                # Use a contrasting outline for better visibility
                cv2.putText(frame, text, (10, y_pos + i*30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 4)  # Black outline
                cv2.putText(frame, text, (10, y_pos + i*30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)  # Green text
        else: # if we don't have predictions yet
            cv2.putText(frame, "Initializing...", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            
        # Display frame
        cv2.imshow('Real-time Object Detection', frame)
        
        # Exit on 'q' press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Clean up
    cap.release()
    cv2.destroyAllWindows()

# Run with webcam (use 0 for default camera)
# To use a video file instead, replace 0 with the path to your video file
run_video_detection(0)