In [2]:
# Cell 1
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

print("Python, OpenCV, NumPy, TF versions:")
print("cv2:", cv2.__version__, "numpy:", np.__version__, "tf:", tf.__version__)


Python, OpenCV, NumPy, TF versions:
cv2: 4.8.0 numpy: 1.24.3 tf: 2.13.0


In [3]:
# Cell 2
# We're assuming this notebook is inside Tmodel and the two class folders exist here.
DATA_DIR = '.'  # Tmodel (current directory)
CLASS_NAMES = ["NOT_THROWING_WASTE", "THROWING_WASTE"]  # EXACT folder names
IMG_SIZE = 64          # image height/width
FRAME_SKIP = 5         # take every 5th frame from each video
TEST_SIZE = 0.20       # fraction for test set
RANDOM_SEED = 42
BATCH_SIZE = 32
EPOCHS = 25
OUTPUT_MODEL = "throwing_waste_model.h5"

os.makedirs('models', exist_ok=True)

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


In [4]:
# Cell 3
def extract_frames_from_video(video_path, frame_skip=FRAME_SKIP, img_size=IMG_SIZE):
    """
    Read video and extract frames (every frame_skip-th frame), convert BGR->RGB,
    resize to img_size x img_size and return list of frames (RGB, 0..255).
    """
    frames = []
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Failed to open:", video_path)
        return frames

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_idx += 1
        if frame_idx % frame_skip == 0:
            # convert to RGB (so visuals with matplotlib look right) and resize
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (img_size, img_size))
            frames.append(frame_resized)
    cap.release()
    return frames


In [5]:
# Cell 4
data = []
labels = []

# iterate classes and videos
for class_index, class_name in enumerate(CLASS_NAMES):
    folder = os.path.join(DATA_DIR, class_name)
    if not os.path.isdir(folder):
        print("WARNING: folder not found:", folder)
        continue
    print("Processing class:", class_name)
    for fname in sorted(os.listdir(folder)):
        if not fname.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
            continue
        fpath = os.path.join(folder, fname)
        frames = extract_frames_from_video(fpath)
        if len(frames) == 0:
            print("  no frames extracted from", fname)
            continue
        # append each frame as a sample
        for fr in frames:
            data.append(fr)
            labels.append(class_index)

# convert to numpy arrays and normalize
data = np.array(data, dtype='float32') / 255.0   # shape: (N, H, W, 3)
labels = np.array(labels, dtype='int32')         # shape: (N,)
print("Dataset prepared. Data shape:", data.shape, "Labels shape:", labels.shape)

# class counts
(unique, counts) = np.unique(labels, return_counts=True)
print("Counts:", dict(zip([CLASS_NAMES[int(u)] for u in unique], counts)))


Processing class: NOT_THROWING_WASTE
Processing class: THROWING_WASTE
Dataset prepared. Data shape: (4164, 64, 64, 3) Labels shape: (4164,)
Counts: {'NOT_THROWING_WASTE': 2657, 'THROWING_WASTE': 1507}


In [6]:
# Cell 5
# Shuffle
data, labels = shuffle(data, labels, random_state=RANDOM_SEED)

# One-hot encode
y = to_categorical(labels, num_classes=len(CLASS_NAMES))

# Train/test split (stratify to keep class ratio)
X_train, X_test, y_train, y_test = train_test_split(
    data, y, test_size=TEST_SIZE, random_state=RANDOM_SEED, stratify=labels
)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)


Train shape: (3331, 64, 64, 3) (3331, 2)
Test shape: (833, 64, 64, 3) (833, 2)


In [7]:
# Cell 6
def build_frame_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=2):
    model = Sequential()
    model.add(Conv2D(32, (3,3), activation='relu', padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))

    model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))

    model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_frame_cnn(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=len(CLASS_NAMES))
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 64, 64, 32)        896       
                                                                 
 batch_normalization (Batch  (None, 64, 64, 32)        128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 32, 32, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 64)        18496     
                                                                 
 batch_normalization_1 (Bat  (None, 32, 32, 64)        256       
 chNormalization)                                                
                                                        

In [8]:
# Cell 7
checkpoint = ModelCheckpoint(
    os.path.join('models', OUTPUT_MODEL),
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)
earlystop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[checkpoint, earlystop],
    verbose=1
)


Epoch 1/25
Epoch 1: val_accuracy improved from -inf to 0.36134, saving model to models\throwing_waste_model.h5


  saving_api.save_model(


Epoch 2/25
Epoch 2: val_accuracy did not improve from 0.36134
Epoch 3/25
Epoch 3: val_accuracy improved from 0.36134 to 0.40936, saving model to models\throwing_waste_model.h5
Epoch 4/25
Epoch 4: val_accuracy improved from 0.40936 to 0.76711, saving model to models\throwing_waste_model.h5
Epoch 5/25
Epoch 5: val_accuracy improved from 0.76711 to 1.00000, saving model to models\throwing_waste_model.h5
Epoch 6/25
Epoch 6: val_accuracy did not improve from 1.00000
Epoch 7/25
Epoch 7: val_accuracy did not improve from 1.00000
Epoch 8/25
Epoch 8: val_accuracy did not improve from 1.00000
Epoch 9/25
Epoch 9: val_accuracy did not improve from 1.00000
Epoch 10/25
Epoch 10: val_accuracy did not improve from 1.00000
Epoch 11/25
Epoch 11: val_accuracy did not improve from 1.00000
Epoch 12/25
Epoch 12: val_accuracy did not improve from 1.00000
Epoch 13/25
Epoch 13: val_accuracy did not improve from 1.00000
Epoch 14/25
Epoch 14: val_accuracy did not improve from 1.00000
Epoch 15/25
Epoch 15: val_ac