In [None]:
import torch
from torch import nn 
import numpy as np 
import pandas as pd
from pycocotools.coco import COCO
import cv2
from PIL import Image, ImageOps

import os
import matplotlib.pyplot as plt
import json
import matplotlib.patches as patches


In [None]:
#!wget http://images.cocodataset.org/zips/train2017.zip -O coco_train2017.zip

In [None]:
#!wget http://images.cocodataset.org/zips/val2017.zip -O coco_val2017.zip


In [None]:
#!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O coco_ann2017.zip


In [None]:
from zipfile import ZipFile, BadZipFile
import os
def getDataExtracted():
    def extract_zip_file(extract_path):
        try:
            with ZipFile(extract_path+".zip") as zfile:
                zfile.extractall(extract_path)
            # remove zipfile
            zfileTOremove=f"{extract_path}"+".zip"
            if os.path.isfile(zfileTOremove):
                os.remove(zfileTOremove)
            else:
                print("Error: %s file not found" % zfileTOremove)    
        except BadZipFile as e:
            print("Error:", e)
    extract_train_path = "./coco_train2017"
    extract_val_path = "./coco_val2017"
    extract_ann_path="./coco_ann2017"

    extract_zip_file(extract_ann_path)

In [None]:
getDataExtracted()

In [None]:
dataDir = 'coco_ann2017'
dataType = 'train2017'
annFile = os.path.join(dataDir, 'annotations', f'instances_{dataType}.json')
coco = COCO(annFile)

In [None]:

dataType = 'train2017'  
dataDir = 'coco_train2017'
imgIds = coco.getImgIds()

chosen_img_id = imgIds[1000]
img_info = coco.loadImgs(chosen_img_id)[0]
img_path = os.path.join(dataDir, dataType, img_info['file_name'])
print(img_path)
if os.path.exists(img_path):
    image = Image.open(img_path)
    width, height = image.size
    annIds = coco.getAnnIds(imgIds=chosen_img_id)
    annotations = coco.loadAnns(annIds)
    fig, ax = plt.subplots(1)
    ax.imshow(image)
    for ann in annotations:
        bbox = ann['bbox']
        x, y, w, h = bbox
        x_min = x
        y_min = y
        x_max = x + w
        y_max = y + h
        rect = plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
    ax.axis('off')
    plt.show()

else:
    print("Error: Image file not found.")

In [None]:
maxy1 = 0
maxy2 = 0
for i in range (len(imgIds)):
    image = Image.open(img_path)
    maxy = max(maxy, image.size[1])
    maxy2 = max(maxy2, image.size[0])
print(maxy)
print(maxy2)
    

In [None]:
print (image.size)

In [None]:
outputDir = 'resize_train'
os.makedirs(outputDir, exist_ok=True)
target_width = 640
target_height = 457
new_annotations = []
dataType = 'train2017'  
dataDir = 'coco_train2017'
imgIds = coco.getImgIds()

for i in range(1000):
    imgId = imgIds[i]
    chosen_img_id = imgIds[i]
    img_info = coco.loadImgs(chosen_img_id)[0]
    img_path = os.path.join(dataDir, dataType, img_info['file_name'])
    image = Image.open(img_path)
    img_filename = img_info['file_name']
    image.thumbnail((target_width, target_height), Image.ANTIALIAS)
    pad_width = target_width - image.width
    pad_height = target_height - image.height
    padding = (0, 0, pad_width, pad_height) 
    image = ImageOps.expand(image, padding)
    
    resized_img_path = os.path.join(outputDir, img_filename)
    image.save(resized_img_path)
    
    new_annotation = {
        'image_id': chosen_img_id,
        'file_name': img_filename,
        'width': target_width,
        'height': target_height,
        'annotations': []  
    }
    
    annIds = coco.getAnnIds(imgIds=chosen_img_id)
    annotations = coco.loadAnns(annIds)
    
    for ann in annotations:
        bbox = ann['bbox']
        x, y, w, h = bbox
        x_ratio = target_width / image.width
        y_ratio = target_height / image.height
        new_x = round(x * x_ratio)
        new_y = round(y * y_ratio)
        new_w = round(w * x_ratio)
        new_h = round(h * y_ratio)
        new_bbox = [new_x, new_y, new_w, new_h]
        new_annotation['annotations'].append(new_bbox)
    
    new_annotations.append(new_annotation)

new_annFile =  'instances_resized.json'
with open(new_annFile, 'w') as f:
    json.dump(new_annotations, f)


In [None]:
target_width = 640
target_height = 457
counter= 0
folder_path = "resize_train"
for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join(folder_path, filename)
        with Image.open(image_path) as img:
            width, height = img.size
            counter+=1
            if width != target_width or height != target_height:
                print(f"Image {filename} has size {width}x{height}.")
print(counter)

In [None]:
outputDir = 'resize_train'


In [None]:
#Check if bounding boxs work with the new images    
new_annFile = 'instances_resized.json'
with open(new_annFile, 'r') as f:
    new_annotations = json.load(f)
image_data = new_annotations[100]
image_path = os.path.join(outputDir, image_data['file_name'])
annotations = image_data['annotations']
image = Image.open(image_path)
fig, ax = plt.subplots()
ax.imshow(image)
for bbox in annotations:
    x, y, w, h = bbox
    rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
plt.show()


In [None]:
instances_resized_file = 'instances_resized.json'
with open(instances_resized_file, 'r') as f:
    instances_resized_data = json.load(f)
annotations_dict = {entry['file_name']: entry['annotations'] for entry in instances_resized_data}

In [None]:
target_width = 640
target_height = 457
def getBinaryMask(annotations,target_height, target_width):
    mask = np.zeros((target_height + 1, target_width), dtype=np.uint8)
    for bbox in annotations:
        x, y, w, h = bbox
        mask[y:y+h, x:x+w] = 1
    mask[-1] = len(annotations)

    return mask

In [None]:
#testing binary mask 
a = [[1,1,2,2]]
mask = getBinaryMask(a, 5,5)
print(mask)

In [None]:
folder_path = "resize_train"
image_filenames = [filename for filename in os.listdir(folder_path) if filename.endswith((".jpg", ".png"))]
image_data_list = []
target_height = 457
target_width = 640
annotations_list = []
for filename in image_filenames:
    image_path = os.path.join(folder_path, filename)
    image = Image.open(image_path)
    if image.mode != "RGB":
        image = image.convert("RGB")
    r, g, b = image.split()
    r_array = np.array(r)
    g_array = np.array(g)
    b_array = np.array(b)
    rgb_array = np.stack((r_array, g_array, b_array), axis=-1)
    np_arr = np.zeros((rgb_array.shape[0] + 1, rgb_array.shape[1], 3))
    np_arr[:rgb_array.shape[0],:,:] = rgb_array
    np_arr[-1,:,:] = 0
    image_data_list.append(np_arr)
    annotations = annotations_dict[filename]
    binary_mask = getBinaryMask(annotations, target_height, target_width)
    annotations_list.append(binary_mask)
X_train = np.array(image_data_list)
X_train  = np.transpose(X_train, (0, 3, 1, 2))
Y_train = np.array(annotations_list)
print("Shape of Training Data array:", X_train.shape)
print("Shape of Output array:", Y_train.shape)


In [None]:
import torch.optim as optim

class Detector(nn.Module):
    def __init__(self, learning_rate = 1e-5, optimizer = "adam"):
        super().__init__()
        layers = [
        nn.Conv2d(3,4,kernel_size =(5, 5),padding = 'same'),
        # nn.ReLU(),
        # nn.Conv2d(32,16,kernel_size =(32, 32),padding = 'same'),
        nn.BatchNorm2d(4, eps=1e-05, momentum=0.1), 
        nn.ReLU(), 
        nn.Conv2d(4,8,kernel_size =(5, 5),padding = 'same'),
        nn.BatchNorm2d(8, eps=1e-05, momentum=0.1), 
        nn.ReLU(), 
        nn.Conv2d(8,4,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(), 
        nn.Conv2d(4,2,kernel_size =(5,5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(2,2,kernel_size =(5,5),padding = 'same'),
        nn.BatchNorm2d(2, eps=1e-05, momentum=0.1), 
        nn.ReLU(),
        nn.Conv2d(2,1,kernel_size =(5,5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(1,1,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU()
        ]
        self.layers = layers
        self.model = nn.Sequential(*layers)
        self.optimizer = optim.Adam(self.model.parameters(), lr = learning_rate, weight_decay = 1e-3)
    def forward(self, x):
        out = self.model(x)
        return out.squeeze() 
    def loss_func(self, x, target):
        res = self.forward(x)
        return torch.mean(torch.square(res - target))
    def backprop(self, x, target):
        self.optimizer.zero_grad()
        loss = self.loss_func(x, target)
        loss.backward()
        self.optimizer.step()
        return loss.item()

In [None]:
import torch.optim as optim

class Detector2(nn.Module):
    def __init__(self, learning_rate = 1e-5, optimizer = "adam"):
        super().__init__()
        layers = [
        nn.Conv2d(3,64,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(64,32,kernel_size =(32, 32),padding = 'same'),
        nn.BatchNorm2d(32, eps=1e-05, momentum=0.1), 
        nn.ReLU(), 
        nn.Conv2d(32,16,kernel_size =(5, 5),padding = 'same'),
        nn.BatchNorm2d(16, eps=1e-05, momentum=0.1), 
        nn.ReLU(), 
        nn.Conv2d(16,8,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(), 
        nn.Conv2d(8,4,kernel_size =(5,5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(4,2,kernel_size =(5,5),padding = 'same'),
        nn.BatchNorm2d(2, eps=1e-05, momentum=0.1), 
        nn.ReLU(),
        nn.Conv2d(2,1,kernel_size =(5,5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(1,1,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU()
        ]
        self.layers = layers
        self.model = nn.Sequential(*layers)
        self.optimizer = optim.Adam(self.model.parameters(), lr = learning_rate, weight_decay = 1e-3)
    def forward(self, x):
        out = self.model(x)
        return out.squeeze() 
    def loss_func(self, x, target):
        res = self.forward(x)
        return torch.mean(torch.square(res - target))
    def backprop(self, x, target):
        self.optimizer.zero_grad()
        loss = self.loss_func(x, target)
        loss.backward()
        self.optimizer.step()
        return loss.item()

In [None]:
import torch.optim as optim

class Detector3(nn.Module):
    def __init__(self, learning_rate = 1e-5, optimizer = "adam"):
        super().__init__()
        layers = [
        nn.Conv2d(3,16,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(),
        nn.Conv2d(16,16,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = (2,2), stride = 2, padding='none'),
        nn.Conv2d(8,8,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = (2,2), stride = 2, padding='none'),
        nn.Conv2d(4,4,kernel_size =(5, 5),padding = 'same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = (2,2), stride = 2, padding='none'),
        nn.Conv2d(2,2,kernel_size =(5, 5),padding = 'same'),
        nn.
        torch.nn.Flatten()
        torch.nn.Linear(in_features, out_features),
        ]
        self.layers = layers
        self.model = nn.Sequential(*layers)
        self.optimizer = optim.Adam(self.model.parameters(), lr = learning_rate, weight_decay = 1e-3)
    def forward(self, x):
        out = self.model(x)
        return out.squeeze() 
    def loss_func(self, x, target):
        res = self.forward(x)
        return torch.mean(torch.square(res - target))
    def backprop(self, x, target):
        self.optimizer.zero_grad()
        loss = self.loss_func(x, target)
        loss.backward()
        self.optimizer.step()
        return loss.item()

In [None]:
model = Detector()
model.train()
n_epochs = 10
BATCH_SIZE = 32
index_train = np.array([i for i in range(1000)])
losses = []
#SUB Test for Train
for epoch in range(n_epochs):
    new_index = np.random.choice(index_train, BATCH_SIZE)
    new_X_train = X_train[new_index]
    new_Y_train = Y_train[new_index]
    X_tensor = torch.tensor(new_X_train, dtype=torch.float32)
    Y_tensor = torch.tensor(new_Y_train, dtype=torch.float32)       
    model.optimizer.zero_grad()
    Y_pred = model(X_tensor)
    loss = model.loss_func(X_tensor, Y_tensor)
    losses.append(loss.item())
    loss.backward()
    model.optimizer.step()
    if (epoch+1) % 1 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
plt.figure(figsize=(10, 5))
plt.plot(range(1, n_epochs+1), losses, label='Training Loss')
plt.title('Epoch vs. Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()



In [None]:

X_test = np.random.rand(2000, 10)  
Y_test = np.random.rand(2000, 1)   

model.eval()  
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    Y_pred_test = model(X_test_tensor)
    test_loss = model.loss_func(Y_pred_test, Y_test_tensor).item()

print(f'Test Loss: {test_loss}')


In [None]:
from sklearn.cluster import KMeans
model = Detector()
values = model(torch.from_numpy(X_test[:2]).float()).numpy()
numpy_array = np.random.rand(458, 640)
last_row_avg = np.round(np.mean(numpy_array[-1]))
num_centers = int(last_row_avg)
data = numpy_array[:-1, :]
data_1d = data.reshape(-1, 1)
kmeans = KMeans(n_clusters=num_centers, random_state=0)
kmeans.fit(data_1d)
labels = kmeans.labels_
labels_2d = labels.reshape(data.shape)
n = 5
image_path = f'resize_train/{n}.jpg'
image = Image.open(image_path)
image_array = np.array(image)

bounding_boxes = []
for label in np.unique(labels):
    indices = np.argwhere(labels_2d == label)
    if len(indices) > 0:
        min_y, min_x = np.min(indices, axis=0)
        max_y, max_x = np.max(indices, axis=0)
        bounding_boxes.append([(min_x, min_y), (max_x, max_y)])

fig, ax = plt.subplots()
ax.imshow(image_array)
for bbox in bounding_boxes:
    min_pt, max_pt = bbox
    rect = plt.Rectangle(min_pt, max_pt[0] - min_pt[0], max_pt[1] - min_pt[1],
                         linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
plt.show()




THIS IS THE BASELINE

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import io

def load_and_convert_image(image_path):
    image = io.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def display_image(image, axis='off'):
    plt.imshow(image)
    plt.axis(axis)
    plt.show()

def perform_kmeans_clustering(image, k=4):
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    return labels, centers

def create_segmented_image(image, labels, centers):
    centers = np.uint8(centers)
    segmented_data = centers[labels.flatten()]
    segmented_image = segmented_data.reshape(image.shape)
    return segmented_image

def mask_cluster(image, labels, cluster=2, color=(0,0,255)):
    labels_reshaped = labels.reshape(image.shape[0], image.shape[1])
    masked_image = np.copy(image)
    masked_image[labels_reshaped == cluster] = [color]
    return masked_image

def find_and_draw_largest_contour(image, contour_color=(255,0,0), pad=3):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    blue = np.uint8([[[255,0,0]]])
    hsv_blue = cv2.cvtColor(blue, cv2.COLOR_BGR2HSV)
    lower_blue, upper_blue = (120,255,250), (120,255,255)
    threshed_image = cv2.inRange(hsv_image, np.array([lower_blue],np.uint8), np.array([upper_blue],np.uint8))
    _, thresh = cv2.threshold(threshed_image,127,255,0)
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    areas = [cv2.contourArea(c) for c in contours]
    max_index = np.argmax(areas)
    cnt = contours[max_index]
    x, y, w, h = cv2.boundingRect(cnt)
    cv2.rectangle(image, (x-pad, y-pad), (x+w+pad, y+h+pad), contour_color, 2)
    return image

image_path = 'train.jpg'
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
masked_image = mask_cluster(image, labels)
display_image(masked_image)
final_image = find_and_draw_largest_contour(masked_image)
display_image(final_image, axis='on')  


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
import torch
from torchvision import models, transforms

def load_and_convert_image(image_path):
    image = io.imread(image_path)
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def display_image(image, axis='off'):
    plt.imshow(image)
    plt.axis(axis)
    plt.show()

def perform_kmeans_clustering(image, k=3):
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    return labels, centers

def create_segmented_image(image, labels, centers):
    centers = np.uint8(centers)
    segmented_data = centers[labels.flatten()]
    segmented_image = segmented_data.reshape(image.shape)
    return segmented_image

def load_pytorch_model():
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def preprocess_image_for_pytorch(image):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((800, 800)),
        transforms.ToTensor()
    ])
    image = transform(image)
    return image

def detect_objects_pytorch(model, image):
    image_tensor = preprocess_image_for_pytorch(image)
    with torch.no_grad():
        prediction = model([image_tensor])[0]
    return prediction

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
    'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
    'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_class_name(class_id):
    return COCO_INSTANCE_CATEGORY_NAMES[class_id]

def draw_detection_boxes(image, prediction, threshold=0.5):
    for element in range(len(prediction['boxes'])):
        if prediction['scores'][element] > threshold:
            box = prediction['boxes'][element].cpu().numpy()
            class_id = prediction['labels'][element].item()-1
            class_name = get_class_name(class_id)
            print('Detected:', class_name)
            cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
            cv2.putText(image, class_name, (int(box[0]), int(box[1]-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)
    return image


image_path = 'dog.jpg'  
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
model = load_pytorch_model()
prediction = detect_objects_pytorch(model, image)
final_image = draw_detection_boxes(image, prediction)
display_image(final_image, axis='on')
image_path = 'sheep.jpeg'  
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
model = load_pytorch_model()
prediction = detect_objects_pytorch(model, image)
final_image = draw_detection_boxes(image, prediction)
display_image(final_image, axis='on')


In [None]:
from PIL import Image, ImageOps
import os
from pycocotools.coco import COCO
import json



dataDir = 'coco_train2017'
outputDir = 'resize_train'
os.makedirs(outputDir, exist_ok=True)
target_width = 64
target_height = 46

new_annFile = os.path.join(outputDir, f'instances_{dataType}_resized.json')
new_annotations = []

for img_id in coco.getImgIds():
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(dataDir, dataType, img_info['file_name'])
    image = Image.open(img_path)

    width, height = image.size
    if width > target_width or height > target_height:
        left = (width - target_width) // 2
        top = (height - target_height) // 2
        right = left + target_width
        bottom = top + target_height
        image = image.crop((left, top, right, bottom))
    elif width < target_width or height < target_height:
        pad_width = max(0, (target_width - width) // 2)
        pad_height = max(0, (target_height - height) // 2)
        padding = (pad_width, pad_height, pad_width, pad_height)
        image = ImageOps.expand(image, padding)
    image = image.resize((target_width, target_height), Image.ANTIALIAS)
    annIds = coco.getAnnIds(imgIds=img_id)
    annotations = coco.loadAnns(annIds)
    new_annotations.extend(annotations)
    for ann in new_annotations[-len(annotations):]:
        bbox = ann['bbox']
        x, y, w, h = bbox
        ann['bbox'] = [x + pad_width, y + pad_height, w, h]
    resized_img_path = os.path.join(outputDir, img_info['file_name'])
    image.save(resized_img_path)

with open(new_annFile, 'w') as f:
    json.dump(new_annotations, f)


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import io

def load_and_convert_image(image_path):
    image = io.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def display_image(image, axis='off'):
    plt.imshow(image)
    plt.axis(axis)
    plt.show()

def perform_kmeans_clustering(image, k=4):
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    return labels, centers

def create_segmented_image(image, labels, centers):
    centers = np.uint8(centers)
    segmented_data = centers[labels.flatten()]
    segmented_image = segmented_data.reshape(image.shape)
    return segmented_image

def mask_cluster(image, labels, cluster=2, color=(0,0,255)):
    labels_reshaped = labels.reshape(image.shape[0], image.shape[1])
    masked_image = np.copy(image)
    masked_image[labels_reshaped == cluster] = [color]
    return masked_image

def find_and_draw_largest_contour(image, contour_color=(255,0,0), pad=3):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    blue = np.uint8([[[255,0,0]]])
    hsv_blue = cv2.cvtColor(blue, cv2.COLOR_BGR2HSV)
    lower_blue, upper_blue = (120,255,250), (120,255,255)
    threshed_image = cv2.inRange(hsv_image, np.array([lower_blue],np.uint8), np.array([upper_blue],np.uint8))
    _, thresh = cv2.threshold(threshed_image,127,255,0)
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    areas = [cv2.contourArea(c) for c in contours]
    max_index = np.argmax(areas)
    cnt = contours[max_index]
    x, y, w, h = cv2.boundingRect(cnt)
    cv2.rectangle(image, (x-pad, y-pad), (x+w+pad, y+h+pad), contour_color, 2)
    return image

image_path = 'train.jpg'
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
masked_image = mask_cluster(image, labels)
display_image(masked_image)
final_image = find_and_draw_largest_contour(masked_image)
display_image(final_image, axis='on')  # Display the final image with axis


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
import torch
from torchvision import models, transforms

def load_and_convert_image(image_path):
    image = io.imread(image_path)
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def display_image(image, axis='off'):
    plt.imshow(image)
    plt.axis(axis)
    plt.show()

def perform_kmeans_clustering(image, k=3):
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    return labels, centers

def create_segmented_image(image, labels, centers):
    centers = np.uint8(centers)
    segmented_data = centers[labels.flatten()]
    segmented_image = segmented_data.reshape(image.shape)
    return segmented_image

def load_pytorch_model():
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def preprocess_image_for_pytorch(image):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((800, 800)),
        transforms.ToTensor()
    ])
    image = transform(image)
    return image

def detect_objects_pytorch(model, image):
    image_tensor = preprocess_image_for_pytorch(image)
    with torch.no_grad():
        prediction = model([image_tensor])[0]
    return prediction

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
    'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
    'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_class_name(class_id):
    return COCO_INSTANCE_CATEGORY_NAMES[class_id]

def draw_detection_boxes(image, prediction, threshold=0.5):
    for element in range(len(prediction['boxes'])):
        if prediction['scores'][element] > threshold:
            box = prediction['boxes'][element].cpu().numpy()
            class_id = prediction['labels'][element].item()-1
            class_name = get_class_name(class_id)
            print('Detected:', class_name)
            cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
            cv2.putText(image, class_name, (int(box[0]), int(box[1]-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)
    return image


image_path = 'dog.jpg'  
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
model = load_pytorch_model()
prediction = detect_objects_pytorch(model, image)
final_image = draw_detection_boxes(image, prediction)
display_image(final_image, axis='on')
image_path = 'sheep.jpeg'  
image = load_and_convert_image(image_path)
display_image(image)
labels, centers = perform_kmeans_clustering(image)
segmented_image = create_segmented_image(image, labels, centers)
display_image(segmented_image)
model = load_pytorch_model()
prediction = detect_objects_pytorch(model, image)
final_image = draw_detection_boxes(image, prediction)
display_image(final_image, axis='on')
