In [1]:
pip install tensorflow pycocotools gym


Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from pycocotools.coco import COCO
import random
import gym
from collections import deque


In [5]:
import tensorflow as tf
from pycocotools.coco import COCO

# Define the root directory for the COCO dataset
dataDir = r'C:\Users\dnave\Downloads\COCO Dataset\coco2017'

# Specify the data type (train or val)
dataType = 'val2017'  # Use validation set; change to 'train2017' for training set

# Define the path to the annotation file (instances for the selected data type)
annFile = f'{dataDir}\\annotations\\instances_{dataType}.json'

# Initialize COCO API for COCO annotations
coco = COCO(annFile)

# Get the image ids
imgIds = coco.getImgIds()

# Load the first image (you can change the index to load another image)
img = coco.loadImgs(imgIds[0])[0]

# Load image and annotations (example for one image)
imgFile = f"{dataDir}\\{dataType}\\{img['file_name']}"

# Debugging: Print the file path to ensure it's correct
print(f"Image file path: {imgFile}")

# Try to load the image
try:
    image = tf.keras.preprocessing.image.load_img(imgFile)
    image = tf.keras.preprocessing.image.img_to_array(image)
    print("Image loaded successfully!")
except FileNotFoundError:
    print(f"Error: File not found at {imgFile}")
except Exception as e:
    print(f"An error occurred: {e}")


loading annotations into memory...
Done (t=0.73s)
creating index...
index created!
Image file path: C:\Users\dnave\Downloads\COCO Dataset\coco2017\val2017\000000397133.jpg
Image loaded successfully!


In [7]:
# Load the ResNet50 model pre-trained on ImageNet without the top (classification) layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

# Create a new model that includes the base model and the output layer
model = Model(inputs=base_model.input, outputs=base_model.output)

# Resize the image to 224x224 for ResNet50
image_resized = tf.image.resize(image, (224, 224))
image_resized = np.expand_dims(image_resized, axis=0)

# Extract features from the image using the pre-trained ResNet50 model
features = model.predict(image_resized)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


In [9]:
# Define the Q-network (simplified for object detection)
class DQNModel(tf.keras.Model):
    def __init__(self, action_space):
        super(DQNModel, self).__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(64, activation='relu')
        self.dense3 = tf.keras.layers.Dense(action_space, activation='linear')  # Action space for bounding box and class

    def call(self, state):
        x = self.flatten(state)
        x = self.dense1(x)
        x = self.dense2(x)
        return self.dense3(x)

# Define the DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)  # Experience replay memory
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQNModel(action_size)
        self.target_model = DQNModel(action_size)  # Target model for stability
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)  # Exploration
        q_values = self.model(state)
        return np.argmax(q_values[0])  # Exploitation

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        batch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.max(self.target_model(next_state))
            with tf.GradientTape() as tape:
                q_values = self.model(state)
                loss = tf.keras.losses.MSE(q_values[0][action], target)
            grads = tape.gradient(loss, self.model.trainable_variables)
            self.model.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))

        # Update epsilon for exploration-exploitation balance
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # Update target model
        self.target_model.set_weights(self.model.get_weights())


In [11]:
def calculate_reward(action):
    # Define reward function based on the accuracy of bounding box prediction
    # Example: +1 for correct detection, -1 for false positive, and 0 for no detection
    return random.choice([1, 0, -1])  # Simplified example


In [13]:
# Define state and action sizes (simplified for object detection)
state_size = features.shape[1:]  # Feature map from CNN
action_size = 4  # Example: bounding box (x, y, width, height) and class prediction

# Initialize the agent
agent = DQNAgent(state_size, action_size)

# Train the agent
for episode in range(1000):  # 1000 episodes (example)
    state = np.expand_dims(features, axis=0)  # Initial state (image feature map)
    done = False
    time = 0
    total_reward = 0

    while not done:
        action = agent.act(state)
        # Simulate taking an action: output bounding box (simplified for illustration)
        reward = calculate_reward(action)  # Define reward function based on accuracy of prediction
        next_state = state  # Update state based on new bounding box (not implemented here)
        done = True  # End the episode (simplified)

        agent.remember(state, action, reward, next_state, done)
        agent.replay(batch_size=32)
        state = next_state
        total_reward += reward

    print(f"Episode {episode+1}/{1000}, Total Reward: {total_reward}")


Episode 1/1000, Total Reward: 1
Episode 2/1000, Total Reward: 0
Episode 3/1000, Total Reward: -1
Episode 4/1000, Total Reward: -1
Episode 5/1000, Total Reward: 1
Episode 6/1000, Total Reward: 0
Episode 7/1000, Total Reward: 0
Episode 8/1000, Total Reward: 1
Episode 9/1000, Total Reward: 1
Episode 10/1000, Total Reward: 0
Episode 11/1000, Total Reward: 0
Episode 12/1000, Total Reward: -1
Episode 13/1000, Total Reward: -1
Episode 14/1000, Total Reward: -1
Episode 15/1000, Total Reward: 1
Episode 16/1000, Total Reward: -1
Episode 17/1000, Total Reward: -1
Episode 18/1000, Total Reward: 0
Episode 19/1000, Total Reward: 0
Episode 20/1000, Total Reward: -1
Episode 21/1000, Total Reward: -1
Episode 22/1000, Total Reward: -1
Episode 23/1000, Total Reward: 1
Episode 24/1000, Total Reward: -1
Episode 25/1000, Total Reward: 0
Episode 26/1000, Total Reward: -1
Episode 27/1000, Total Reward: -1
Episode 28/1000, Total Reward: 0
Episode 29/1000, Total Reward: 1
Episode 30/1000, Total Reward: 0
Episod

InvalidArgumentError: {{function_node __wrapped__Mean_device_/job:localhost/replica:0/task:0/device:CPU:0}} Invalid reduction dimension (-1 for input with 0 dimension(s) [Op:Mean] name: 

In [None]:
import numpy as np
import tensorflow as tf
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
import cv2

# Define the agent (use the DQNAgent class from previous code)
# Assume `DQNAgent` has been defined earlier as it was in your previous code

# For the sake of simplicity, we'll assume that the agent has been trained
# and we are ready to test it.

def detect_object_in_image(agent, image_path, coco, dataDir, dataType):
    """
    Given an image path, use the trained DQN agent to detect an object.
    The function assumes the agent can predict the bounding box and class.
    """

    # Load the image
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Preprocess the image to match the input expected by the CNN (state_size)
    img_resized = cv2.resize(img_rgb, (224, 224))  # Resize to the expected input size
    img_array = np.array(img_resized) / 255.0  # Normalize pixel values

    # Add batch dimension to the image
    img_array = np.expand_dims(img_array, axis=0)

    # Get the features from the CNN (This is a placeholder, you can replace it with your CNN model)
    features = img_array  # Replace with the actual CNN output features if needed

    # Set the initial state (features from the CNN)
    state = np.expand_dims(features, axis=0)  # Expand dims for batch size

    # Use the DQN agent to predict the action (bounding box + class)
    action = agent.act(state)  # This will give us the bounding box and class prediction

    # Extract the predicted bounding box and class
    predicted_bbox = action[:4]  # Assuming action contains (x, y, width, height, class)
    predicted_class = action[4]  # Predicted class label

    # Get the class label for the predicted class
    category = coco.loadCats(predicted_class)[0]['name']

    # Calculate the coordinates of the bounding box
    x, y, w, h = predicted_bbox
    x, y, w, h = int(x), int(y), int(w), int(h)

    # Draw the bounding box and label on the image
    cv2.rectangle(img_rgb, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cv2.putText(img_rgb, f"{category}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the result
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.show()

    # Save the result (optional)
    cv2.imwrite('detected_object.jpg', cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR))

# Example of how to use this function:
image_path = r'C:\path_to_image\image.jpg'  # Update with your image path
dataDir = r'C:\Users\dnave\Downloads\COCO Dataset\coco2017'  # COCO dataset path
dataType = 'val2017'  # Validation data type
annFile = f'{dataDir}\\annotations\\instances_{dataType}.json'
coco = COCO(annFile)

# Assuming the agent is already trained, use the detect_object_in_image function
detect_object_in_image(agent, image_path, coco, dataDir, dataType)


In [None]:
import cv2
import os

def detect_object_in_image(agent, image_path, coco, dataDir, dataType):
    """
    Given an image path, use the trained DQN agent to detect an object.
    The function assumes the agent can predict the bounding box and class.
    """

    # Check if the image exists
    if not os.path.isfile(image_path):
        print(f"Error: Image file not found at {image_path}")
        return

    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Failed to load image from {image_path}")
        return

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Preprocess the image to match the input expected by the CNN (state_size)
    img_resized = cv2.resize(img_rgb, (224, 224))  # Resize to the expected input size
    img_array = np.array(img_resized) / 255.0  # Normalize pixel values

    # Add batch dimension to the image
    img_array = np.expand_dims(img_array, axis=0)

    # Get the features from the CNN (This is a placeholder, you can replace it with your CNN model)
    features = img_array  # Replace with the actual CNN output features if needed

    # Set the initial state (features from the CNN)
    state = np.expand_dims(features, axis=0)  # Expand dims for batch size

    # Use the DQN agent to predict the action (bounding box + class)
    action = agent.act(state)  # This will give us the bounding box and class prediction

    # Extract the predicted bounding box and class
    predicted_bbox = action[:4]  # Assuming action contains (x, y, width, height, class)
    predicted_class = action[4]  # Predicted class label

    # Get the class label for the predicted class
    category = coco.loadCats(predicted_class)[0]['name']

    # Calculate the coordinates of the bounding box
    x, y, w, h = predicted_bbox
    x, y, w, h = int(x), int(y), int(w), int(h)

    # Draw the bounding box and label on the image
    cv2.rectangle(img_rgb, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cv2.putText(img_rgb, f"{category}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the result
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.show()

    # Save the result (optional)
    cv2.imwrite('detected_object.jpg', cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR))


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50

# Define the DQN agent (simplified for object classification)
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.model = self.build_model()
    
    def build_model(self):
        # Simple neural network for DQN model (based on state input)
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, input_shape=(self.state_size,), activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(self.action_size, activation='softmax')  # Action size = number of classes
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model
    
    def act(self, state):
        # Choose action based on the policy (epsilon-greedy, for example)
        action_probs = self.model.predict(state)
        return np.argmax(action_probs)  # Return the class with highest probability
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def replay(self, batch_size):
        # Sample a batch of experiences from memory and train the model
        if len(self.memory) < batch_size:
            return
        
        batch = np.random.choice(self.memory, batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward  # Simplified target (in practice, use Bellman equation)
            target_f = self.model.predict(state)
            target_f[0][action] = target  # Update target for the selected action
            
            self.model.fit(state, target_f, epochs=1, verbose=0)

# Define the function to preprocess the image
def preprocess_image(image_path, model):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img_rgb, (224, 224))  # Resize to the expected input size
    img_array = np.array(img_resized) / 255.0  # Normalize pixel values
    
    # Add batch dimension and pass through CNN (ResNet50)
    img_array = np.expand_dims(img_array, axis=0)
    features = model.predict(img_array)  # CNN feature extraction
    return features

# Initialize the ResNet50 model for feature extraction
cnn_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# COCO dataset paths
dataDir = 'C:/Users/dnave/Downloads/COCO Dataset/coco2017'
dataType = 'val2017'
annFile = f'{dataDir}/annotations/instances_{dataType}.json'
coco = COCO(annFile)

# Initialize DQN agent
state_size = cnn_model.output.shape[1]  # Feature size from CNN (e.g., 2048 for ResNet50)
action_size = 80  # COCO has 80 object categories
agent = DQNAgent(state_size, action_size)

# Define the reward function (simplified)
def calculate_reward(predicted_class, true_class):
    return 1 if predicted_class == true_class else -1

# Function to detect object in the image using the trained DQN agent
def detect_object_in_image(agent, image_path, coco, dataDir, dataType):
    # Preprocess the image
    features = preprocess_image(image_path, cnn_model)
    
    # Use the DQN agent to predict the class of the object in the image
    state = np.expand_dims(features, axis=0)  # Add batch dimension
    predicted_class = agent.act(state)  # Predicted class (index of the object)
    
    # Get the class name
    category = coco.loadCats(predicted_class)[0]['name']
    
    # Display the results
    print(f"Predicted Class: {category}")
    
    # Load and display the image with the predicted label
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    cv2.putText(img_rgb, f"Predicted: {category}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.show()

# Example usage:
image_path = 'C:/path_to_image/000000579970.jpg'  # Replace with your image path
detect_object_in_image(agent, image_path, coco, dataDir, dataType)


In [None]:
import cv2
import numpy as np

def preprocess_image(image_path, model):
    print(f"Loading image from path: {image_path}")
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Error: Image not found or cannot be read from {image_path}")
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    img_resized = cv2.resize(img_rgb, (224, 224))  # Resize to the expected input size
    img_array = np.array(img_resized) / 255.0  # Normalize pixel values
    
    # Add batch dimension and pass through CNN (ResNet50)
    img_array = np.expand_dims(img_array, axis=0)
    features = model.predict(img_array)  # CNN feature extraction
    return features

# Example usage:
image_path = 'C:Users/dnave/Downloads/COCO Dataset/val2017/000000579970.jpg'  
try:
    features = preprocess_image(image_path, cnn_model)
    print("Image processed successfully.")
except ValueError as e:
    print(e)
