<a href="https://colab.research.google.com/github/AmanPhadke/DataScience_Journey/blob/main/Path1/Week7_Python/Projects/Vision_Based_Video_Scene_Intelligence_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
!pip install ultralytics opencv-python



In [42]:
from ultralytics import YOLO

In [43]:
from google.colab import files
uploaded = files.upload()

In [44]:
import pandas as pd
import numpy as np
import plotly.express as px
import cv2

In [45]:
video = '/content/Video_5.mp4'


In [46]:
import cv2

cap = cv2.VideoCapture(video)

fps = cap.get(cv2.CAP_PROP_FPS)
frame_interval = int(fps * 0.5)  # every 0.5 sec

frame_id = 0
saved_frames = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % frame_interval == 0:
        saved_frames.append((frame_id, frame))

    frame_id += 1

cap.release()

print("Total sampled frames:", len(saved_frames))

Total sampled frames: 17


In [79]:
model = YOLO('yolov8n.pt')
classes = set()

data = []

for frame_id, frame in saved_frames:
  results = model.predict(frame, verbose = False)[0]
  boxes = results.boxes



  if boxes is None:
    continue

  for i in range(len(boxes)):
    confidence = float(boxes.conf[i])
    if confidence > 0.5:
      class_id = int(boxes.cls[i])
      class_name = model.names[class_id]

      classes.add(class_name)

      x1, y1, x2, y2 = boxes.xyxy[i].tolist()

      width = x2 - x1
      height = y2 - y1
      area = height * width

      x_centre = (x1+x2)/2
      y_centre = (y2 + y1)/2

      data.append([
          frame_id,
          class_name,
          confidence,
          area,
          x_centre,
          y_centre
      ])

df = pd.DataFrame(data, columns = [
    'frame_id',
    'class_name',
    'confidence',
    'area',
    'x_centre',
    'y_centre'
])

In [74]:
df.head()

Unnamed: 0,frame_id,class_name,confidence,area,x_centre,y_centre
0,0,person,0.77555,121845.217514,2157.968262,1690.851135
1,0,person,0.675687,44047.975671,1636.503052,1692.974487
2,0,person,0.590629,48688.518246,1743.740845,1688.18866
3,0,person,0.587145,28196.254642,1950.718872,1648.643555
4,0,person,0.571518,53119.935479,1507.830261,1684.70105


In [69]:
#How crouded is each frame
df['frame_id'].value_counts()

Unnamed: 0_level_0,count
frame_id,Unnamed: 1_level_1
174,11
232,10
29,9
58,9
464,8
145,8
116,8
203,8
261,8
406,8


In [78]:
#Number of objects per class
df.groupby('frame_id')['class_name'].value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
frame_id,class_name,Unnamed: 2_level_1
0,person,2
29,person,2
58,person,3
87,person,4
116,person,5
145,person,6
174,person,5
203,person,5
232,person,6
261,person,7


In [80]:
#Creating a feature matrix
feature_matrix = df.pivot_table(
    index = 'frame_id',
    columns = 'class_name',
    aggfunc = 'size',
    fill_value = 0
)

In [81]:
feature_matrix

class_name,person,suitcase,traffic light
frame_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,5,0,0
29,4,0,0
58,5,0,0
87,5,0,0
116,7,0,0
145,6,0,0
174,6,0,1
203,6,0,0
232,7,0,0
261,7,0,0


In [87]:
feature_matrix.sum(axis=1)

Unnamed: 0_level_0,0
frame_id,Unnamed: 1_level_1
0,5
29,4
58,5
87,5
116,7
145,6
174,7
203,6
232,7
261,7


In [89]:
area_per_frame = df.groupby('frame_id')['area'].sum()
area_per_frame

Unnamed: 0_level_0,area
frame_id,Unnamed: 1_level_1
0,295897.9
29,266610.4
58,367216.5
87,454535.1
116,672541.0
145,723866.7
174,951751.9
203,1248376.0
232,1413403.0
261,1151426.0
