In [1]:
!git clone https://github.com/GantMan/nsfw_model.git  

Cloning into 'nsfw_model'...
remote: Enumerating objects: 481, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (71/71), done.[K
remote: Total 481 (delta 33), reused 23 (delta 9), pack-reused 397[K
Receiving objects: 100% (481/481), 470.33 KiB | 7.13 MiB/s, done.
Resolving deltas: 100% (239/239), done.


In [2]:
!pip install pytube

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytube
  Downloading pytube-12.1.2-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 KB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-12.1.2


In [3]:
!pip install --upgrade scenedetect[opencv]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scenedetect[opencv]
  Downloading scenedetect-0.6.1-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 KB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scenedetect
Successfully installed scenedetect-0.6.1


In [4]:
!wget https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.h5

--2023-03-27 14:41:29--  https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.h5
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.120.128, 52.217.166.160, 52.216.33.104, ...
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.120.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 158652512 (151M) [application/x-www-form-urlencoded]
Saving to: ‘nsfw.299x299.h5’


2023-03-27 14:41:35 (27.2 MB/s) - ‘nsfw.299x299.h5’ saved [158652512/158652512]



In [7]:
import numpy as np
import pandas as pd
from pytube import YouTube

In [8]:
import sys
import os

PATH_TO_NSWF_MODEL = "/content/nsfw_model"
if PATH_TO_NSWF_MODEL not in sys.path:
    sys.path.append(PATH_TO_NSWF_MODEL)

In [9]:
from tqdm import tqdm

def download_video_from_youtube(urls, dir, file_extension="mp4", resolution="360p"):
    if isinstance(urls, str):
        urls = [urls]
    for url in tqdm(urls):
        yt = YouTube(url)
        stream = yt.streams.filter(file_extension=file_extension)
        stream.get_by_resolution(resolution).download(dir)

In [171]:
urls = "https://www.youtube.com/watch?v=Fm_iyGAutqc"
download_video_from_youtube(urls, "/content/", resolution="360p")

100%|██████████| 1/1 [00:09<00:00,  9.25s/it]


In [135]:
import numpy as np
import cv2
from PIL import Image
from IPython.display import display
from abc import ABC, abstractmethod
from nsfw_detector import predict
from scenedetect import detect, ContentDetector, AdaptiveDetector
from pprint import pprint

In [54]:
def central_crop(image):
    center = image.shape
    w = h = min(image.shape[:2])
    x = center[1]/2 - w/2
    y = center[0]/2 - h/2

    crop_img = image[int(y):int(y+h), int(x):int(x+w)]
    return crop_img


In [55]:
class VideoClassifier(ABC):
    """Template of classifier class.

    Attributes:
    * input_dim - dimension of input for model.
    * batch_size - is a batch size of frames to the model, classify 
    frames when they will gather in batch.
    * threshold - is model threshold (prob_of_iclass > threshold => iclass).

    Methods:
    * predict - returns probs of frames.
    * classify_scenes - return timecode and frames of scenes to censor
    """
    @abstractmethod
    def __init__(self, input_dim, batch_size, threshold):
        self.batch_size = batch_size
        self.input_dim = input_dim
        self.central_crop = True
        self.threshold = threshold

    @abstractmethod
    def predict(self, video_path, start_frame=0, end_frame=None, classify_every_n_frames=1):
        """Return probs of frames nswf classes starting from 
        `start_frame`(inclusive) to `end_frame`(exclusive) 
        with step `classify_every_n_frames`.

        Example: {'number_of_frame':{'class':probability, ...}, ...}
        """
        ...

    @abstractmethod
    def classify_scenes(self, video_path, scenes=None, scene_threshold=.1, classify_every_n_frames=1):
        """Return List of tuples (start of the scene: FrameTimecode, end of the scene: FrameTimecode)
        
        Args:
        * scenes - List of tuples of FrameTimecode to be classified. 
        If None than create with alghorithm. default None.
        * scene_threshold - if number of scenes with nswf class > scene_threshold than add it to return.

        FrameTimecode. https://scenedetect.com/projects/Manual/en/latest/api/frame_timecode.html#scenedetect-frame-timecode
        """
        ...

    def _preprocess(self, frame):
        frame = central_crop(frame) if self.central_crop else frame
        resized_frame = cv2.resize(frame, (self.input_dim, self.input_dim))
        resized_frame = resized_frame / 255
        return resized_frame[np.newaxis, ...]

In [152]:
class IntimateVideoClassifier(VideoClassifier):
    def __init__(self, model_path, input_dim, batch_size, threshold=.8):
        """Note. input_dim = according to the weights of the model 224x224 or 299x299."""
        super().__init__(input_dim, batch_size, threshold)
        self.model = predict.load_model(model_path)
        self.central_crop = True

    def predict(self, video_path, start_frame=0, end_frame=None, classify_every_n_frames=1):
        frame_count = start_frame

        batch = []
        frames_idx = []
        output = {}

        cap = cv2.VideoCapture(video_path)
        # end frame or video length
        end_frame = end_frame or int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        # set start frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
        while cap.isOpened() and (frame_count < end_frame):
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = self._preprocess(frame)
                # collect frames to batch with indexes
                frames_idx.append(frame_count)
                batch.append(frame)
                # process batch
                if len(batch) >= self.batch_size:
                    minibatch = np.concatenate(batch, axis=0)
                    probs = predict.classify_nd(self.model, minibatch)
                    output.update(dict(zip(frames_idx, probs)))
                    # clear frames and indexes collections
                    batch.clear()
                    frames_idx.clear()

                frame_count += classify_every_n_frames
                # move forward for `classify_every_n_frames` frames
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
            else:
                cap.release()
                break

        if len(batch) > 0:
            minibatch = np.concatenate(batch, axis=0)
            probs = predict.classify_nd(self.model, minibatch)
            output.update(dict(zip(frames_idx, probs)))

        return output

    def classify_scenes(self, video_path, scenes=None, scene_threshold=.1, classify_every_n_frames=1):
        scenes = scenes or detect(video_path, ContentDetector())
        nswf_scenes = []
        # TODO We create stream for every scene
        for scene_start, scene_end in scenes:
            scene_start_frame = scene_start.get_frames()
            scene_end_frame = scene_end.get_frames()
            scene_length = (scene_end_frame - scene_start_frame) / classify_every_n_frames

            # TODO Highlight slow movments and blurry frames
            probs = self.predict(video_path, 
                         start_frame=scene_start_frame, 
                         end_frame=scene_end_frame,
                         classify_every_n_frames=classify_every_n_frames
                         )
            nswf_scenes_count = sum([
                prob["porn"] > self.threshold 
                or prob["hentai"] > self.threshold 
                or prob["sexy"] > self.threshold 
                for prob in probs.values()
            ])
            # print(scene_start_frame, scene_end_frame, ":", (nswf_scenes_count / scene_length))
            if (nswf_scenes_count / scene_length) > scene_threshold:
                nswf_scenes.append((scene_start, scene_end))
        return self.gluing(nswf_scenes)

    @classmethod
    def gluing(cls, raw_nswf_scenes):
        """Glue close frames."""
        nswf_scenes = []
        for idx in range(len(raw_nswf_scenes)):
            if len(nswf_scenes) > 0:
                pred_end = nswf_scenes[-1][1].get_frames()
                cur_start = raw_nswf_scenes[idx][0].get_frames()
                if (cur_start - pred_end) < 20:
                    nswf_scenes[-1] = (nswf_scenes[-1][0], raw_nswf_scenes[idx][1])
                else:
                    nswf_scenes.append(raw_nswf_scenes[idx])
            else:    
                nswf_scenes.append(raw_nswf_scenes[idx])
        return nswf_scenes

    def greedy_classify(self, video_path):
        ...


In [168]:
classifier = IntimateVideoClassifier("/content/nsfw.299x299.h5", 299, 24)

In [172]:
video_path = "/content/Глюк’оZа - Мотыльки (feat KYIVSTONER).mp4"

In [None]:
out = classifier.classify_scenes(video_path, scene_threshold=.3, classify_every_n_frames=1)
out

INFO:pyscenedetect:Downscale factor set to 2, effective resolution: 320 x 180
INFO:pyscenedetect:Detecting scenes...




In [165]:
def cut_video_fragment(video_path, fragment_path, start_frame=0, end_frame=None):
    frame_count = start_frame
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter(fragment_path, fourcc, 20.0, (width,  height))
    # end frame or video length
    end_frame = end_frame or int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # set start frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    while cap.isOpened() and (frame_count < end_frame):
        ret, frame = cap.read()
        if ret:
            out.write(frame)
            frame_count += 1
        else:
            cap.release()
            out.release()
            break

In [166]:
for idx, (start, end) in enumerate(out):
    srart = start.get_frames()
    end = end.get_frames()
    cut_video_fragment(video_path, f"{video_path[:-4]}-{idx}.mp4", srart, end)