In [1]:
# Persiapan Lingkungan dan Instalasi
!pip install -q transformers==4.41.2
!pip install -q torch==2.3.0
!pip install -q accelerate==0.30.1
!pip install -q opencv-python
!pip install -q Pillow

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m111.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m907.0 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m112.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m57.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import cv2
import torch
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image

# 1. Muat Model GIT
print("Memuat model GIT, proses ini lebih cepat...")
processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
print("Model GIT berhasil dimuat!")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# 2. Definisikan Fungsi Validasi
def validate_video_with_git(video_path, required_keywords):
    """
    Validasi video dengan menghasilkan deskripsi gambar menggunakan model GIT.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Gagal membuka file video.")
        return False

    valid_frames_count = 0
    frame_interval = 1  # Proses setiap 1 frame

    frame_count = 0
    while cap.isOpened():
        ret, frame_bgr = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame_rgb)

            # Pra-proses gambar
            pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)

            # Hasilkan deskripsi (caption) untuk gambar
            generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
            caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip().lower()

            print(f"Frame {frame_count}: '{caption}'")

            # Analisis deskripsi untuk kata kunci yang diperlukan
            # Mengubah logika validasi: cek apakah setidaknya dua dari tiga kata kunci ada

            found_keywords = [keyword for keyword in required_keywords if keyword in caption]

            # Jika ditemukan minimal 2 dari 3 kata kunci, anggap frame valid
            is_valid_frame = len(found_keywords) >= 2

            if is_valid_frame:
                valid_frames_count += 1
            else:
                valid_frames_count = 0

            # Jika terdeteksi dalam 3 frame berturut-turut
            if valid_frames_count >= 3:
                cap.release()
                return True

        frame_count += 1

    cap.release()
    return False

# 3. Jalankan Validasi
if __name__ == "__main__":
    video_file_kertas = 'video_kertas_benar.mp4'
    video_file_daun = 'video_daun_benar.mp4'
    video_file_botol_plastik = 'video_botol_plastik_benar.mp4'
    video_file_kaleng = 'video_kaleng_benar.mp4'

    keywords_kertas = ['person', 'paper', 'yellow trash can']
    keywords_daun = ['person', 'leaves', 'green trash can']
    keywords_botol_plastik = ['person', 'plastic bottle', 'yellow trash can']
    keywords_kaleng = ['person', 'drink cans', 'yellow trash can']

    print(f"Memproses video: {video_file_kertas}...")
    valid = validate_video_with_git(video_file_kertas, keywords_kertas)

    # Validasi pengecekan video pembuangan sampah kertas
    if valid:
        print("Video Kertas dianggap valid. Misi berhasil!")
    else:
        print("Video Kertas tidak memenuhi kriteria. Misi gagal.")

    # Validasi pengecekan video pembuangan sampah daun
    print(f"\nMemproses video: {video_file_daun}...")
    valid = validate_video_with_git(video_file_daun, keywords_daun)

    if valid:
        print("Video Daun dianggap valid. Misi berhasil!")
    else:
        print("Video Daun tidak memenuhi kriteria. Misi gagal.")

    # Validasi pengecekan video pembuangan botol plastik
    print(f"\nMemproses video: {video_file_botol_plastik}...")
    valid = validate_video_with_git(video_file_botol_plastik, keywords_botol_plastik)

    if valid:
        print("Video Botol Plastik dianggap valid. Misi berhasil!")
    else:
        print("Video Botol Plastik tidak memenuhi kriteria. Misi gagal.")

    # Validasi pengecekan video pembuangan kaleng minuman
    print(f"\nMemproses video: {video_file_kaleng}...")
    valid = validate_video_with_git(video_file_kaleng, keywords_kaleng)

    if valid:
        print("Video Kaleng Minuman dianggap valid. Misi berhasil!")
    else:
        print("Video Kaleng Minuman tidak memenuhi kriteria. Misi gagal.")

Memuat model GIT, proses ini lebih cepat...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/503 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/453 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/707M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

Model GIT berhasil dimuat!
Memproses video: video_kertas_benar.mp4...
Frame 0: 'a yellow trash can with a black lid'
Frame 1: 'a yellow trash can with a black lid'
Frame 2: 'a yellow trash can with a black lid'
Frame 3: 'a yellow trash can with a black lid'
Frame 4: 'a yellow bin with a lid and a black lid'
Frame 5: 'a yellow bin with a lid and a black lid'
Frame 6: 'a yellow trash can with a black lid'
Frame 7: 'a yellow trash can with a black lid'
Frame 8: 'a yellow trash can with a lid'
Frame 9: 'a yellow trash can with a black lid'
Frame 10: 'a plastic bin with a lid and a plastic bag'
Frame 11: 'a yellow trash can with a black lid'
Frame 12: 'how to make a disposable plastic bag out of plastic'
Frame 13: 'how to make a disposable plastic bag out of a plastic bin'
Frame 14: 'a hand is seen filling a plastic bin with a plastic bag.'
Frame 15: 'a hand picking up a bag of trash from a yellow trash can'
Frame 16: 'a hand is shown with a plastic bag.'
Frame 17: 'a person throwing a plas