# Testing pre-trained models

### Model # 1

In [36]:
import cv2
import numpy as np 
from PIL import Image
import time

In [24]:
from transformers import pipeline
pipe = pipeline("object-detection", model="valentinafeve/yolos-fashionpedia")

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


In [34]:
pipe("temp_frame.jpg")

[{'score': 0.9643442630767822,
  'label': 'sleeve',
  'box': {'xmin': 786, 'ymin': 181, 'xmax': 896, 'ymax': 496}},
 {'score': 0.9950535893440247,
  'label': 'sleeve',
  'box': {'xmin': 517, 'ymin': 208, 'xmax': 624, 'ymax': 537}}]

In [35]:
pipe("test.png")

[{'score': 0.9653264880180359,
  'label': 'shoe',
  'box': {'xmin': 81, 'ymin': 512, 'xmax': 111, 'ymax': 578}},
 {'score': 0.9268865585327148,
  'label': 'shoe',
  'box': {'xmin': 105, 'ymin': 525, 'xmax': 129, 'ymax': 569}},
 {'score': 0.9727214574813843,
  'label': 'sleeve',
  'box': {'xmin': 117, 'ymin': 108, 'xmax': 181, 'ymax': 266}},
 {'score': 0.984233021736145,
  'label': 'sleeve',
  'box': {'xmin': 14, 'ymin': 123, 'xmax': 86, 'ymax': 238}}]

In [46]:
def classify_image(img_array):
    predictions = pipe(img_array)
    decoded_predictions =  predictions
    return decoded_predictions   
 

def draw_boxes_on_frame(frame, predictions):
    for prediction in predictions:
        label = prediction['label']
        box = prediction['box']
 
        xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax'] 
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) 
        label_text = f'{label}: {prediction["score"]:.2f}'
        cv2.putText(frame, label_text, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame

def classify_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    frame_interval = 1.5   
    last_prediction_time = time.time()

    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            break

        current_time = time.time()

        if current_time - last_prediction_time >= frame_interval:
            pil_image = Image.fromarray(np.uint8(frame)) 
            predicted_class = classify_image(pil_image)   

            print(predicted_class)

            frame_with_boxes = draw_boxes_on_frame(frame.copy(), predicted_class)
            cv2.imshow('Video with Bounding Boxes', frame_with_boxes)
 
            last_prediction_time = current_time
   
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
 
    cap.release()
    cv2.destroyAllWindows() 
    print("Video OK! No Inappropriate Content Detected")

video_path = 'final_test3.mp4' 
classify_video(video_path)

[{'score': 0.9696283936500549, 'label': 'sleeve', 'box': {'xmin': 166, 'ymin': 472, 'xmax': 242, 'ymax': 672}}, {'score': 0.9190516471862793, 'label': 'shoe', 'box': {'xmin': 345, 'ymin': 851, 'xmax': 411, 'ymax': 919}}]
[{'score': 0.9685527086257935, 'label': 'sleeve', 'box': {'xmin': 166, 'ymin': 473, 'xmax': 241, 'ymax': 672}}, {'score': 0.9073960781097412, 'label': 'shoe', 'box': {'xmin': 345, 'ymin': 851, 'xmax': 411, 'ymax': 918}}]
[{'score': 0.9760302305221558, 'label': 'sleeve', 'box': {'xmin': 167, 'ymin': 475, 'xmax': 240, 'ymax': 680}}, {'score': 0.9733465313911438, 'label': 'sleeve', 'box': {'xmin': 322, 'ymin': 475, 'xmax': 400, 'ymax': 672}}]
[{'score': 0.9776043891906738, 'label': 'sleeve', 'box': {'xmin': 167, 'ymin': 475, 'xmax': 242, 'ymax': 680}}, {'score': 0.9568054676055908, 'label': 'sleeve', 'box': {'xmin': 323, 'ymin': 475, 'xmax': 399, 'ymax': 670}}]
[{'score': 0.9576045274734497, 'label': 'sleeve', 'box': {'xmin': 171, 'ymin': 483, 'xmax': 246, 'ymax': 675}}, 

### Model # 2

In [1]:
from transformers import pipeline
pipe = pipeline("zero-shot-image-classification", model="patrickjohncyh/fashion-clip")

  from .autonotebook import tqdm as notebook_tqdm
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [23]:
res = pipe("temp_frame.jpg", candidate_labels=["short skirt"], multi_class=True) 
res

[{'score': 1.0, 'label': 'short skirt'}]

In [3]:
short_skirt_score = None
for prediction in res:
    if prediction['label'] == 'short skirt':
        short_skirt_score = prediction['score']
        break

In [11]:
def classify_image(img_array):
    predictions = pipe(img_array, candidate_labels=["short skirt", "short sleeve", "long sleeve", "short dress", "short pants", "short sleeve shirt", "short sleeve vest"])
    decoded_predictions =  predictions
    return decoded_predictions   
 
def classify_video(video_path):
    cap = cv2.VideoCapture(video_path)

    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            break
   
        pil_image = Image.fromarray(np.uint8(frame)) 
        predicted_class = classify_image(pil_image) 

        short_skirt_score = None
        for prediction in predicted_class:
            if prediction['label'] == 'short skirt':
                short_skirt_score = prediction['score']
                break

        if short_skirt_score > 0.7:
            print("Video Error! The person is wearing a short skirt")
            cap.release()
            cv2.destroyAllWindows() 
            return 

        cv2.putText(frame, f'Prediction: {short_skirt_score}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Video Classification', frame)
 
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
 
    cap.release()
    cv2.destroyAllWindows() 
    print("Video OK! No Inappropriate Content Detected")

video_path = 'test.mp4' 
classify_video(video_path)