# Imports & Installations

In [None]:
import cv2
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt
import shutil

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


# Preparing Data

In [None]:
input_video_path = 'path' # Redacted
output_dir = 'path' # Redacted

os.makedirs(output_dir, exist_ok=True)

video = cv2.VideoCapture(input_video_path)
fps = video.get(cv2.CAP_PROP_FPS)  # Frames per second
frame_interval = int(fps)  # (1 frame per second)

# Extracting Frames
success, frame = video.read()
count = 0
while success:
    if count % frame_interval == 0:
        timestamp = count // frame_interval
        filename = os.path.join(output_dir, f'{timestamp}.jpg')
        cv2.imwrite(filename, frame)
        print(f"Count: {count}, Timestamp: {timestamp}")
    success, frame = video.read()
    count += 1

video.release()
print(f"Frames extracted: {output_dir}")

## Frame Dimensions

In [None]:
input_video_path = 'path' # Redacted
video = cv2.VideoCapture(input_video_path)
success, frame = video.read()
if success:
    img_height, img_width = frame.shape[:2]
video.release()

print(f"Frame dimensions: {img_width}x{img_height}")

Frame dimensions: 1920x1080


# Training NN

## Preliminary Stuff

### Merging Training Data Directories

In [None]:
def merge_directories(source_dir, target_dir, prefix, verbose=0):
    """
    Merge files from source_dir to target_dir with renaming to avoid conflicts.
    Adds a prefix to each file from the source directory.
    """
    for subdir in ['Yes', 'No']:
        source_subdir = os.path.join(source_dir, subdir)
        target_subdir = os.path.join(target_dir, subdir)

        os.makedirs(target_subdir, exist_ok=True)

        for filename in os.listdir(source_subdir):
            if os.path.isfile(os.path.join(source_subdir, filename)):
                # New filename to avoid naming conflicts
                new_filename = f"{prefix}_{filename}"
                source_file = os.path.join(source_subdir, filename)
                target_file = os.path.join(target_subdir, new_filename)

                shutil.copy(source_file, target_file)
                if verbose == 1:
                    print(f"Copied {source_file} to {target_file}")

In [None]:
# Define paths for the training data directories
source_dir_ep1 = 'path' # Redacted
source_dir_ep2 = 'path' # Redacted
source_dir_ep3 = 'path' # Redacted
target_dir = 'path' # Redacted

os.makedirs(target_dir, exist_ok=True)

# Merge training data (Note, one episode is sufficient for most purposes)
merge_directories(source_dir_ep1, target_dir, 'EP1')
merge_directories(source_dir_ep2, target_dir, 'EP2')
merge_directories(source_dir_ep3, target_dir, 'EP3')

### Model Architecture

In [None]:
train_dir = 'path' # Redacted

# Parameters (adjust as you see fit)
img_height = 180
img_width = 320
batch_size = 32

In [None]:
data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.15)

In [None]:
train_generator = data_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)

In [None]:
val_generator = data_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

In [None]:
# Model definition (Customize)
# model = tf.keras.models.Sequential([
#     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
#     tf.keras.layers.MaxPooling2D((2, 2)),
#     tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
#     tf.keras.layers.MaxPooling2D((2, 2)),
#     tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
#     tf.keras.layers.MaxPooling2D((2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(1, activation='sigmoid')
# ])

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

## Actual Training

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training
model.fit(train_generator, epochs=10, validation_data=val_generator)

Epoch 1/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 2s/step - accuracy: 0.8216 - loss: 1.2782 - val_accuracy: 0.8831 - val_loss: 0.5204
Epoch 2/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 2s/step - accuracy: 0.8792 - loss: 0.2665 - val_accuracy: 0.7836 - val_loss: 0.4483
Epoch 3/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 2s/step - accuracy: 0.9037 - loss: 0.2072 - val_accuracy: 0.8883 - val_loss: 0.3134
Epoch 4/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 2s/step - accuracy: 0.9434 - loss: 0.1262 - val_accuracy: 0.8778 - val_loss: 0.3167
Epoch 5/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 2s/step - accuracy: 0.9577 - loss: 0.0965 - val_accuracy: 0.8813 - val_loss: 0.2577
Epoch 6/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 2s/step - accuracy: 0.9704 - loss: 0.0833 - val_accuracy: 0.8691 - val_loss: 0.2710
Epoch 7/10
[1m102/102

<keras.src.callbacks.history.History at 0x7cedb7ffb1c0>

# Testing NN

## Preliminary Stuff

In [None]:
# Define path to the directory with images
image_dir = 'path' # Redacted

In [None]:
# Make sure this matches what is defined earlier
img_height = 180
img_width = 320

results = []

In [None]:
output_yes_dir = 'path' # Redacted
output_no_dir = 'path' # Redacted

os.makedirs(output_yes_dir, exist_ok=True)
os.makedirs(output_no_dir, exist_ok=True)

## Actual Testing

In [None]:
for filename in sorted(os.listdir(image_dir)):
    img_path = os.path.join(image_dir, filename)
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_processed = np.expand_dims(img_array, axis=0) / 255.0

    # Predict
    prediction = model.predict(img_processed, verbose=0)
    confidence = prediction[0][0]
    timestamp = os.path.splitext(filename)[0]  # Filename is the timestamp

    predicted_class = "yes" if confidence > .5 else "no"
    results.append((timestamp, predicted_class, confidence, img))

    if predicted_class == "yes":
        shutil.copy(img_path, os.path.join(output_yes_dir, filename))
    else:
        shutil.copy(img_path, os.path.join(output_no_dir, filename))

### Bulk Processing

In [None]:
# Adjust as necessary, index is ep#
for ep in range(4, 13):
    image_dir = f'path' # Redacted
    output_yes_dir = f'path' # Redacted
    output_no_dir = f'path' # Redacted
    img_height = 180
    img_width = 320
    results = []
    os.makedirs(output_yes_dir, exist_ok=True)
    os.makedirs(output_no_dir, exist_ok=True)

    for filename in sorted(os.listdir(image_dir)):
        img_path = os.path.join(image_dir, filename)
        img = image.load_img(img_path, target_size=(img_height, img_width))
        img_array = image.img_to_array(img)
        img_processed = np.expand_dims(img_array, axis=0) / 255.0
        prediction = model.predict(img_processed, verbose=0)
        confidence = prediction[0][0]
        timestamp = os.path.splitext(filename)[0]
        predicted_class = "yes" if confidence > .5 else "no"
        results.append((timestamp, predicted_class, confidence, img))
        if predicted_class == "yes":
            shutil.copy(img_path, os.path.join(output_yes_dir, filename))
        else:
            shutil.copy(img_path, os.path.join(output_no_dir, filename))

## Results

### All Results Chronological

In [None]:
# Results sorted by timestamp
sorted_by_time = sorted(results, key=lambda x: int(x[0]))
print("Predictions sorted by time:")
for result in sorted_by_time:
    print(f"Timestamp: {result[0]}s, Prediction: {result[1]}, Confidence: {result[2]:.2f}")

Predictions sorted by time:
Timestamp: 0s, Prediction: no, Confidence: 0.00
Timestamp: 1s, Prediction: no, Confidence: 0.00
Timestamp: 2s, Prediction: no, Confidence: 0.00
Timestamp: 3s, Prediction: no, Confidence: 0.05
Timestamp: 4s, Prediction: no, Confidence: 0.05
Timestamp: 5s, Prediction: no, Confidence: 0.05
Timestamp: 6s, Prediction: no, Confidence: 0.00
Timestamp: 7s, Prediction: no, Confidence: 0.00
Timestamp: 8s, Prediction: no, Confidence: 0.00
Timestamp: 9s, Prediction: no, Confidence: 0.00
Timestamp: 10s, Prediction: no, Confidence: 0.00
Timestamp: 11s, Prediction: no, Confidence: 0.00
Timestamp: 12s, Prediction: no, Confidence: 0.00
Timestamp: 13s, Prediction: no, Confidence: 0.00
Timestamp: 14s, Prediction: no, Confidence: 0.00
Timestamp: 15s, Prediction: no, Confidence: 0.00
Timestamp: 16s, Prediction: no, Confidence: 0.00
Timestamp: 17s, Prediction: no, Confidence: 0.00
Timestamp: 18s, Prediction: no, Confidence: 0.00
Timestamp: 19s, Prediction: no, Confidence: 0.00
Ti

### By Confidence

In [None]:
# Results sorted by confidence
sorted_by_confidence = sorted(results, key=lambda x: x[2], reverse=True)
print("\nPredictions sorted by confidence from 'yes' to 'no':")
for result in sorted_by_confidence:
    print(f"Timestamp: {result[0]}s, Prediction: {result[1]}, Confidence: {result[2]:.2f}")


Predictions sorted by confidence from 'yes' to 'no':
Timestamp: 1248s, Prediction: yes, Confidence: 1.00
Timestamp: 1329s, Prediction: yes, Confidence: 1.00
Timestamp: 825s, Prediction: yes, Confidence: 1.00
Timestamp: 823s, Prediction: yes, Confidence: 1.00
Timestamp: 824s, Prediction: yes, Confidence: 1.00
Timestamp: 1330s, Prediction: yes, Confidence: 1.00
Timestamp: 516s, Prediction: yes, Confidence: 0.99
Timestamp: 517s, Prediction: yes, Confidence: 0.99
Timestamp: 513s, Prediction: yes, Confidence: 0.99
Timestamp: 1220s, Prediction: yes, Confidence: 0.99
Timestamp: 1221s, Prediction: yes, Confidence: 0.99
Timestamp: 1222s, Prediction: yes, Confidence: 0.99
Timestamp: 515s, Prediction: yes, Confidence: 0.99
Timestamp: 514s, Prediction: yes, Confidence: 0.99
Timestamp: 133s, Prediction: yes, Confidence: 0.99
Timestamp: 1332s, Prediction: yes, Confidence: 0.99
Timestamp: 1333s, Prediction: yes, Confidence: 0.99
Timestamp: 86s, Prediction: yes, Confidence: 0.99
Timestamp: 1331s, Pre

### Top 10s

In [None]:
yes_results = [res for res in results if res[1] == "yes"]
no_results = [res for res in results if res[1] == "no"]

top_yes = sorted(yes_results, key=lambda x: x[2], reverse=True)[:10]
top_no = sorted(no_results, key=lambda x: x[2], reverse=False)[:10]

print("Top 10 'yes' predictions:")
for res in top_yes:
    print(f"Timestamp: {res[0]}s, Prediction: {res[1]}, Confidence: {res[2]:.2f}")

print("\nTop 10 'no' predictions:")
for res in top_no:
    print(f"Timestamp: {res[0]}s, Prediction: {res[1]}, Confidence: {res[2]:.2f}")

Top 10 'yes' predictions:
Timestamp: 1248s, Prediction: yes, Confidence: 1.00
Timestamp: 1329s, Prediction: yes, Confidence: 1.00
Timestamp: 825s, Prediction: yes, Confidence: 1.00
Timestamp: 823s, Prediction: yes, Confidence: 1.00
Timestamp: 824s, Prediction: yes, Confidence: 1.00
Timestamp: 1330s, Prediction: yes, Confidence: 1.00
Timestamp: 516s, Prediction: yes, Confidence: 0.99
Timestamp: 517s, Prediction: yes, Confidence: 0.99
Timestamp: 513s, Prediction: yes, Confidence: 0.99
Timestamp: 1220s, Prediction: yes, Confidence: 0.99

Top 10 'no' predictions:
Timestamp: 125s, Prediction: no, Confidence: 0.00
Timestamp: 234s, Prediction: no, Confidence: 0.00
Timestamp: 1316s, Prediction: no, Confidence: 0.00
Timestamp: 882s, Prediction: no, Confidence: 0.00
Timestamp: 1010s, Prediction: no, Confidence: 0.00
Timestamp: 1328s, Prediction: no, Confidence: 0.00
Timestamp: 1327s, Prediction: no, Confidence: 0.00
Timestamp: 883s, Prediction: no, Confidence: 0.00
Timestamp: 1372s, Prediction: 

In [None]:
def display_images(results):
    fig, axes = plt.subplots(1, len(results), figsize=(20, 4))
    for i, (timestamp, prediction, confidence, img) in enumerate(results):
        ax = axes[i]
        ax.imshow(img)
        ax.set_title(f"Timestamp: {timestamp}\nPrediction: {prediction}\nConfidence: {confidence:.2f}")
        ax.axis('off')
    plt.show()

print("Top 10 'yes' predictions:")
display_images(top_yes)

print("Top 10 'no' predictions:")
display_images(top_no)