In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os

# Path to the directory containing the video folders
data_dir = '/content/drive/MyDrive/OMSCS/Cichlids/allVideos/MC_singlenuc23_8_Tk33_031720'

In [3]:
# Dictionary to store video information
video_info = {}

# Loop through all subdirectories in the data directory
for root, _, files in os.walk(data_dir):
  for filename in files:
    if filename.endswith('.mp4'):  # Modify extension if needed
      # Get the full path of the video file
      video_path = os.path.join(root, filename)
      # Extract the video filename without the path
      video_name = os.path.basename(video_path)

      # Create a dictionary entry with name and location keys
      video_info[video_name] = {'name': video_name, 'location': video_path}

# Print the total number of videos found
print(f"Total video files found: {len(video_info)}")

# Optional: Print a few video information entries
print("Sample video information:")
for i, (name, info) in enumerate(video_info.items()):
  print(f"- Name: {name}")
  print(f"  Location: {info['location']}")
  if i >= 2:  # Limit the number of printed entries (optional)
    break

Total video files found: 200
Sample video information:
- Name: MC_singlenuc23_8_Tk33_031720__0001_vid__529__7726__2293__177__1008_ManualLabel.mp4
  Location: /content/drive/MyDrive/OMSCS/Cichlids/allVideos/MC_singlenuc23_8_Tk33_031720/MC_singlenuc23_8_Tk33_031720__0001_vid__529__7726__2293__177__1008_ManualLabel.mp4
- Name: MC_singlenuc23_8_Tk33_031720__0001_vid__529__7726__2293__177__1008.mp4
  Location: /content/drive/MyDrive/OMSCS/Cichlids/allVideos/MC_singlenuc23_8_Tk33_031720/MC_singlenuc23_8_Tk33_031720__0001_vid__529__7726__2293__177__1008.mp4
- Name: MC_singlenuc23_8_Tk33_031720__0001_vid__547__3944__2304__821__1054.mp4
  Location: /content/drive/MyDrive/OMSCS/Cichlids/allVideos/MC_singlenuc23_8_Tk33_031720/MC_singlenuc23_8_Tk33_031720__0001_vid__547__3944__2304__821__1054.mp4


In [4]:
import pandas as pd

# Path to the CSV file on Google Drive
csv_path = "/content/drive/MyDrive/OMSCS/Cichlids/ManualLabels.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_path)
print(df['ClipName'].head())

0      MC16_2__0001_vid__192__2135__797__238__1036
1    MC16_2__0001_vid__292__10672__1014__350__1109
2     MC16_2__0001_vid__384__1158__1138__545__1113
3      MC16_2__0001_vid__404__7200__1252__850__926
4      MC16_2__0001_vid__535__2728__1534__673__835
Name: ClipName, dtype: object


In [5]:
# Create a new column called 'DriveLocation' in the dataframe
df['DriveLocation'] = None

# Loop through each video name in the CSV
for i, video_name in enumerate(df['ClipName']):
  for j, (video_drive, info) in enumerate(video_info.items()):
    if video_name in video_drive:
      df.at[i, 'DriveLocation'] = info['location']

df.head()

Unnamed: 0,LID,ClipName,ManualLabel,MLabeler,MLabelTime,DriveLocation
0,0,MC16_2__0001_vid__192__2135__797__238__1036,m,zack-Lenovo-ideapad-Y700-15ISK,50:12.8,
1,1,MC16_2__0001_vid__292__10672__1014__350__1109,o,zack-Lenovo-ideapad-Y700-15ISK,34:58.9,
2,2,MC16_2__0001_vid__384__1158__1138__545__1113,m,zack-Lenovo-ideapad-Y700-15ISK,53:41.7,
3,3,MC16_2__0001_vid__404__7200__1252__850__926,o,zack-Lenovo-ideapad-Y700-15ISK,09:55.4,
4,4,MC16_2__0001_vid__535__2728__1534__673__835,c,zack-Lenovo-ideapad-Y700-15ISK,04:34.0,


In [6]:
# Select the desired columns using square brackets
selected_columns = ['ClipName', 'DriveLocation', 'ManualLabel']
smaller_df = df[selected_columns]

# View the first few rows of the smaller dataframe
smaller_df.head(100)

Unnamed: 0,ClipName,DriveLocation,ManualLabel
0,MC16_2__0001_vid__192__2135__797__238__1036,,m
1,MC16_2__0001_vid__292__10672__1014__350__1109,,o
2,MC16_2__0001_vid__384__1158__1138__545__1113,,m
3,MC16_2__0001_vid__404__7200__1252__850__926,,o
4,MC16_2__0001_vid__535__2728__1534__673__835,,c
...,...,...,...
95,MC16_2__0001_vid__4968__1252__14261__464__939,,s
96,MC16_2__0001_vid__5033__973__14321__720__985,,o
97,MC16_2__0001_vid__5052__2406__14342__311__907,,o
98,MC16_2__0001_vid__5072__693__14365__561__722,,o


In [7]:
# Count the number of non-null values in the 'DriveLocation' column
non_null_count = smaller_df['DriveLocation'].notnull().sum()

# Count the total number of rows in the smaller dataframe
total_rows = len(smaller_df)

# Calculate the percentage of rows with non-null 'DriveLocation'
percentage = (non_null_count / total_rows) * 100

# Print informative messages
print(f"Number of instances with non-null 'DriveLocation': {non_null_count}")
print(f"Total number of rows in the smaller dataframe: {total_rows}")
print(f"Percentage of rows with non-null 'DriveLocation': {percentage:.2f}%")

Number of instances with non-null 'DriveLocation': 100
Total number of rows in the smaller dataframe: 15789
Percentage of rows with non-null 'DriveLocation': 0.63%


In [8]:
non_null_df = smaller_df[smaller_df['DriveLocation'].notnull()]
non_null_df.head()

Unnamed: 0,ClipName,DriveLocation,ManualLabel
14966,MC_singlenuc23_8_Tk33_031720__0001_vid__689__2...,/content/drive/MyDrive/OMSCS/Cichlids/allVideo...,f
14967,MC_singlenuc23_8_Tk33_031720__0001_vid__6379__...,/content/drive/MyDrive/OMSCS/Cichlids/allVideo...,o
14968,MC_singlenuc23_8_Tk33_031720__0001_vid__752__9...,/content/drive/MyDrive/OMSCS/Cichlids/allVideo...,f
14969,MC_singlenuc23_8_Tk33_031720__0001_vid__666__1...,/content/drive/MyDrive/OMSCS/Cichlids/allVideo...,f
14970,MC_singlenuc23_8_Tk33_031720__0001_vid__6116__...,/content/drive/MyDrive/OMSCS/Cichlids/allVideo...,t


In [9]:
import os
import cv2

files_to_process = []

for index, row in non_null_df.iterrows():
  file_path = row['DriveLocation']
  file_size = os.path.getsize(file_path)

  # Check if the file size is greater than 100 KB
  if file_size > 100 * 1024:  # 100 KB = 100 * 1024 bytes
      files_to_process.append({'file': row['DriveLocation'], 'label': row['ManualLabel']})

# Now files_to_process contains only the files above 100 KB
print(f"Total files to process: {len(files_to_process)}")
print(f"Percentage of >100 KB files: {(len(files_to_process)/non_null_count*100):2f}%")

Total files to process: 100
Percentage of >100 KB files: 100.000000%


In [112]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

In [151]:
frame_width = 80
frame_height = 80
num_frames = 55

In [152]:
def custom_3d_cnn(frame_width, frame_height, num_frames, num_classes):
    # Input layer for the video frames
    inputs = keras.Input(shape=(num_frames, frame_height, frame_width, 3))

    # 3D Convolutional layers
    x = layers.Conv3D(filters=16, kernel_size=(3, 3, 3), activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=(2, 2, 2))(x)
    x = layers.Conv3D(filters=32, kernel_size=(3, 3, 3), activation="relu")(x)
    x = layers.MaxPool3D(pool_size=(2, 2, 2))(x)
    x = layers.Conv3D(filters=64, kernel_size=(3, 3, 3), activation="relu")(x)
    x = layers.MaxPool3D(pool_size=(2, 2, 2))(x)

    # Flatten the output from the 3D convolutional layers
    x = layers.Flatten()(x)

    # Fully connected layers
    x = layers.Dense(units=128, activation="relu")(x)
    x = layers.Dropout(0.7)(x)  # Dropout layer to prevent overfitting
    outputs = layers.Dense(units=num_classes, activation="softmax")(x)

    # Create the model
    model = keras.Model(inputs=inputs, outputs=outputs)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=1e-2), loss="categorical_crossentropy", metrics=["accuracy"])

    return model

In [153]:
# Extract frames from videos
def extract_frames(video_path, frame_width, frame_height, num_frames):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (frame_width, frame_height))
        frames.append(frame)
    cap.release()
    # If the video has fewer frames, pad with black frames
    while len(frames) < num_frames:
        frames.append(np.zeros((frame_height, frame_width, 3), dtype=np.uint8))
    return np.array(frames[:num_frames])

In [154]:
def create_dataset(files_to_process, frame_width, frame_height, num_frames):
    X = []
    y = []
    for file_info in files_to_process:
        video_path = file_info['file']
        label = file_info['label']
        frames = extract_frames(video_path, frame_width, frame_height, num_frames)
        X.append(frames)
        y.append(label)
    return np.array(X), np.array(y)

In [155]:
# Example usage
X, y = create_dataset(files_to_process, frame_width, frame_height, num_frames)

In [156]:
label_mapping = {'m': 0, 'o': 1, 'c': 2, 'p': 3, 'd': 4, 'x': 5, 'f': 6, 't': 7, 'b': 8, 's': 9}
y = np.array([label_mapping[label] for label in y])
y = to_categorical(y)

In [157]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [158]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

In [159]:
# Example function to apply data augmentation to each frame
def augment_frames(X, datagen):
    X_augmented = []
    for frames in X:
        frames_augmented = []
        for frame in frames:
            frame = frame.reshape((1,) + frame.shape)
            frame_augmented = datagen.flow(frame, batch_size=1)[0]
            frames_augmented.append(frame_augmented[0])
        X_augmented.append(frames_augmented)
    return np.array(X_augmented)

X_train_augmented = augment_frames(X_train, datagen)

In [160]:
X_train_augmented = augment_frames(X_train, datagen)

In [161]:
model = custom_3d_cnn(frame_width, frame_height, num_frames, num_classes=10)

In [162]:
# Train the model
history = model.fit(X_train_augmented, y_train, epochs=40, batch_size=96, validation_split=0.2)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [163]:
# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy * 100:.2f}%")

Test accuracy: 20.00%
