In [9]:
import os
import cv2
import csv
from pytube import YouTube
from ultralytics import YOLO
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers, models
import numpy as np


In [10]:
video_url = "https://youtu.be/eibhK1fgG48?si=LvTwX4fMxfvC9xuG"
save_path = r"D:\OneDrive\سطح المكتب\iti_task\maryam"
video_file = os.path.join(save_path, "video.mp4")

if not os.path.exists(video_file):
    print("Downloading video...")
    yt = YouTube(video_url)
    stream = yt.streams.filter(file_extension="mp4", progressive=True).first()
    stream.download(output_path=save_path, filename="video.mp4")
    print("Video downloaded:", video_file)
else:
    print("Video already exists:", video_file)


Video already exists: D:\OneDrive\سطح المكتب\iti_task\maryam\video.mp4


In [11]:
output_dir = os.path.join(save_path, "tmp", "files")
os.makedirs(output_dir, exist_ok=True)

print("Running YOLO...")
model_yolo = YOLO("yolov8n.pt")

cap = cv2.VideoCapture(video_file)
frame_count = 0
crop_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1

    results = model_yolo(frame)

    for r in results:
        boxes = r.boxes
        for box in boxes:
            cls = int(box.cls[0])
            if cls == 0:  # person
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                crop = frame[y1:y2, x1:x2]
                crop_path = os.path.join(output_dir, f"person_{frame_count}_{crop_count}.jpg")
                cv2.imwrite(crop_path, crop)
                crop_count += 1

cap.release()
print(f"Cropped {crop_count} persons and saved in {output_dir}")


Running YOLO...

0: 384x640 9 persons, 1 backpack, 4 handbags, 56.5ms
Speed: 1.4ms preprocess, 56.5ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 3 handbags, 1 suitcase, 60.1ms
Speed: 2.1ms preprocess, 60.1ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 1 backpack, 1 handbag, 1 suitcase, 53.3ms
Speed: 1.0ms preprocess, 53.3ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 backpack, 1 handbag, 51.4ms
Speed: 1.1ms preprocess, 51.4ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 51.7ms
Speed: 1.0ms preprocess, 51.7ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 1 handbag, 1 suitcase, 50.8ms
Speed: 1.1ms preprocess, 50.8ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 1 handbag, 1 suitcase, 55.2ms
Speed: 1.8ms preprocess, 55.2ms inf

In [12]:
data_dir = r"D:\OneDrive\سطح المكتب\iti_task\maryam\archive\data"
img_size = (128, 128)
batch_size = 32

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode="binary",
    subset="training"
)

val_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode="binary",
    subset="validation"
)

model_cnn = models.Sequential([
    layers.Conv2D(32, (3,3), activation="relu", input_shape=(128,128,3)),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(64, (3,3), activation="relu"),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(128, (3,3), activation="relu"),
    layers.MaxPooling2D(2,2),

    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])

model_cnn.compile(optimizer="adam",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

print("Training CNN...")
model_cnn.fit(train_generator, validation_data=val_generator, epochs=10)

# Save model
model_cnn.save("mask_classifier.h5")
print("Model saved as mask_classifier.h5")


Found 6043 images belonging to 2 classes.
Found 1510 images belonging to 2 classes.
Training CNN...
Epoch 1/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 678ms/step - accuracy: 0.8259 - loss: 0.3880 - val_accuracy: 0.9146 - val_loss: 0.2264
Epoch 2/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 209ms/step - accuracy: 0.8896 - loss: 0.2630 - val_accuracy: 0.9344 - val_loss: 0.1765
Epoch 3/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 212ms/step - accuracy: 0.9088 - loss: 0.2259 - val_accuracy: 0.9490 - val_loss: 0.1545
Epoch 4/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 218ms/step - accuracy: 0.9184 - loss: 0.1937 - val_accuracy: 0.9172 - val_loss: 0.2153
Epoch 5/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 217ms/step - accuracy: 0.9361 - loss: 0.1634 - val_accuracy: 0.9404 - val_loss: 0.1727
Epoch 6/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s



Model saved as mask_classifier.h5


In [15]:
print("Running classification on cropped images with batching...")

results_csv = os.path.join(save_path, "predictions.csv")
batch_size = 32
img_size = (128, 128)

# get all image paths
all_images = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".jpg")]
num_images = len(all_images)

predictions = []

# process in batches
for i in range(0, num_images, batch_size):
    batch_files = all_images[i:i+batch_size]
    batch_data = []

    for img_path in batch_files:
        img = image.load_img(img_path, target_size=img_size)
        img_array = image.img_to_array(img) / 255.0
        batch_data.append(img_array)

    batch_data = np.array(batch_data)

    # predict for the batch
    batch_preds = model_cnn.predict(batch_data, verbose=0)

    for j, pred in enumerate(batch_preds):
        label = 1 if pred[0] > 0.5 else 0
        predictions.append((os.path.basename(batch_files[j]), label))

    # print progress with percentage
    done = min(i+batch_size, num_images)
    percent = (done / num_images) * 100
    print(f"Processed {done} / {num_images} images ({percent:.2f}%)")

# save results to csv
with open(results_csv, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["filename", "prediction"])
    writer.writerows(predictions)

print(f"Predictions for {num_images} images saved in: {results_csv}")



Running classification on cropped images with batching...
Processed 32 / 13122 images (0.24%)
Processed 64 / 13122 images (0.49%)
Processed 96 / 13122 images (0.73%)
Processed 128 / 13122 images (0.98%)
Processed 160 / 13122 images (1.22%)
Processed 192 / 13122 images (1.46%)
Processed 224 / 13122 images (1.71%)
Processed 256 / 13122 images (1.95%)
Processed 288 / 13122 images (2.19%)
Processed 320 / 13122 images (2.44%)
Processed 352 / 13122 images (2.68%)
Processed 384 / 13122 images (2.93%)
Processed 416 / 13122 images (3.17%)
Processed 448 / 13122 images (3.41%)
Processed 480 / 13122 images (3.66%)
Processed 512 / 13122 images (3.90%)
Processed 544 / 13122 images (4.15%)
Processed 576 / 13122 images (4.39%)
Processed 608 / 13122 images (4.63%)
Processed 640 / 13122 images (4.88%)
Processed 672 / 13122 images (5.12%)
Processed 704 / 13122 images (5.37%)
Processed 736 / 13122 images (5.61%)
Processed 768 / 13122 images (5.85%)
Processed 800 / 13122 images (6.10%)
Processed 832 / 1312