<a href="https://colab.research.google.com/github/HarisudhanVL/Key-Frames-Extraction-and-Object-Detection/blob/main/Key_Frames_and_YOLOv3_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Extraction of keyframes from a video using pyhton.

### Required libraries for extracting keyframes from a video.

**Katna is a Python library that automates the boring, error-prone task of video keyframe extraction, video compression, and image cropping and resizing. It is a free and open-source library**

In [None]:
from Katna.video import Video
from Katna.writer import KeyFrameDiskWriter
import os

### Extraction of keyframes from a video.

**Katna uses a variety of techniques to extract keyframes, including:**

***Motion detection: This technique identifies frames that contain significant motion.***

**Face detection: This technique identifies frames that contain faces**.

***Saliency detection: This technique identifies frames that are visually salient.***

In [None]:
if __name__ == "__main__":
    vd = Video()
    no_of_frames_to_returned = 12
    diskwriter = KeyFrameDiskWriter(location=r"D:\Tann Mann Intership\Interview task\KeyFrames") ## Saving the frames locally.
    video_file_path = os.path.join(".", "tests", "data", "D:\Tann Mann Intership\Interview task\\video.mp4") ## Input video.
    print(f"Input video file path = {video_file_path}")
    vd.extract_video_keyframes(no_of_frames=no_of_frames_to_returned, file_path=video_file_path, writer=diskwriter) ## Extracting Keyframes.

Input video file path = D:\Tann Mann Intership\Interview task\video.mp4
Completed processing for :  D:\Tann Mann Intership\Interview task\video.mp4


## Detecting objects from the detected keyframes using YOLOv3 model.

### Required libraries for building YOLOv3 Model.

In [None]:
import numpy as np
import cv2
import os

### Change the working directory.

In [None]:
os.chdir(r"D:\Tann Mann Intership\Interview task")

### Load YOLOv3 model's weights, configuration and classes.

In [None]:
net = cv2.dnn.readNet("yolov3.weights","yolov3.cfg")
classes = []
with open("coco.names","r") as f:
    classes = f.read().splitlines()

### Pre-trained model classes.

In [None]:
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [None]:
for i in range(0,12):
    # Loading_keyframes
    img = cv2.imread(r"D:\Tann Mann Intership\Interview task\KeyFrames\video_%d.jpeg" %i)
    height,width,_ = img.shape

    # Detecting objects
    blob = cv2.dnn.blobFromImage(img,1/255,(416,416),(0,0,0),swapRB=True,crop = False)

    #for b in blob:
     #   for n,img_blob in enumerate(b):
      #      cv2.imshow(str(n),img_blob)

    net.setInput(blob)

    # Output layers
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    # Showing info on screen / get confidence score of algorithm in detecting an object in blob
    boxes = []
    confidences = []
    class_ids = []
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)

                boxes.append([x,y,w,h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.5,0.4)

    # Drawing box over the detected object.
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0,255,size=(len(boxes),3))

    lists=[]
    j=0
    if len(indexes) > 0:
        for i in indexes.flatten():
            x,y,w,h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i],2))
            color = colors[i]
            cv2.rectangle(img,(x,y),(x+w, y+h),color,2)
            cv2.putText(img,label+" "+confidence, (x,y+20),font,2,(255,255,255),2)

            cv2.imshow("Image",img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            j+=1
        print("Number of objects = %d" %(j))


Number of objects = 5
Number of objects = 5
Number of objects = 1
Number of objects = 3
Number of objects = 3
Number of objects = 2
Number of objects = 3
