In [15]:
import tensorflow as tf
import os
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline

if not os.path.isdir('models'):
    os.mkdir('models')
    
print('TensorFlow version:', tf.__version__)
print('Is using GPU?', tf.test.is_gpu_available())

TensorFlow version: 2.12.0
Is using GPU? False


In [16]:
import cv2
import keras
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [17]:
train = pd.read_csv('Data/train_new.csv')
train.head()

Unnamed: 0,image,class
0,harassment_shv1.mp4_frame0.jpg,1
1,harassment_shv1.mp4_frame1.jpg,1
2,harassment_shv1.mp4_frame10.jpg,1
3,harassment_shv1.mp4_frame11.jpg,1
4,harassment_shv1.mp4_frame12.jpg,1


In [18]:
import numpy as np
from tqdm import tqdm
from PIL import Image
from tensorflow.keras.preprocessing import image

# creating an empty list
train_image = []

# for loop to read and store frames
for i in tqdm(range(train.shape[0])):
    # loading the image and keeping the target size as (224,224,3)
    img = Image.open('train/' + train['image'][i]).resize((224, 224))
    # converting it to array
    img = image.img_to_array(img)
    # normalizing the pixel value
    img = img / 255.0
    # appending the image to the train_image list
    train_image.append(img)
    
# converting the list to numpy array
X = np.array(train_image)

# shape of the array
X.shape

100%|██████████| 813/813 [00:16<00:00, 48.56it/s]


(813, 224, 224, 3)

In [19]:
y = train['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2, stratify=y)

In [20]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: class, dtype: int64

In [21]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Dropout, Flatten, Input, Dense
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.models import Model

def create_model():
    
    def add_conv_block(model, num_filters):
        
        model = Conv2D(num_filters, 3, activation='relu', padding='same')(model)
        model = BatchNormalization()(model)
        model = SeparableConv2D(num_filters, 3, activation='relu', padding='valid')(model)
        model = MaxPooling2D(pool_size=2)(model)
        model = Dropout(0.2)(model)

        return model
    
    inputs = Input(shape=(224, 224, 3))
    
    x = add_conv_block(inputs, 32)
    x = add_conv_block(x, 64)
    x = add_conv_block(x, 128)
    x = add_conv_block(x, 256)
    
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(2, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

model = create_model()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d_4 (Conv2D)           (None, 224, 224, 32)      896       
                                                                 
 batch_normalization_4 (Batc  (None, 224, 224, 32)     128       
 hNormalization)                                                 
                                                                 
 separable_conv2d_4 (Separab  (None, 222, 222, 32)     1344      
 leConv2D)                                                       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                             

In [22]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the number of epochs and batch size
epochs = 10
batch_size = 128

# Set up a checkpoint to save the best model weights
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[checkpoint]
)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
# getting the test list
f = open("test.txt", "r")
temp = f.read()
videos = temp.split('\n')

# creating the dataframe
test = pd.DataFrame()
test['video_name'] = videos
test = test[:-1]
test_videos = test['video_name']
test.head()

Unnamed: 0,video_name
0,harassment/harassment_shv4.mp4
1,harassment/harassment_shv8.mp4
2,harassment/harassment_shv14.mp4
3,harassment/harassment_shv18.mp4
4,non_harassment/non4.mp4


In [24]:
# creating the tags
train = pd.read_csv('Data/train_new.csv')
y = train['class']
y = pd.get_dummies(y)

In [25]:
from glob import glob
from scipy import stats as s
import os

# creating two lists to store predicted and actual tags
predict = []
actual = []

# Enable eager execution
tf.config.run_functions_eagerly(True)

# removing all files from the temp folder
files = glob('temp/*')
for f in files:
    os.remove(f)

# for loop to extract frames from each test video
for i in tqdm(range(test_videos.shape[0])):
    count = 0
    videoFile = test_videos[i]
    videoPath = os.path.join('Data', videoFile)
    cap = cv2.VideoCapture(videoPath)  # capturing the video from the given path
    
    if not cap.isOpened():
        # Skip iteration if video capture fails
        continue
    
    frameRate = cap.get(5)  # frame rate
    
    while cap.isOpened():
        frameId = cap.get(1)  # current frame number
        ret, frame = cap.read()
        
        if not ret:
            break
        
        if frameId % 3 == 0:
            # storing the frames of this particular video in temp folder
            filename = os.path.join('temp', f'_frame{count}.jpg')
            cv2.imwrite(filename, frame)
            count += 1
    
    cap.release()
    
    # reading all the frames from temp folder
    images = glob("temp/*.jpg")
    
    if len(images) == 0:
        # Skip iteration if no valid images found
        continue
    
    prediction_images = []
    for img_path in images:
        img = image.load_img(img_path, target_size=(224, 224, 3))
        img = image.img_to_array(img)
        img = img / 255.0
        prediction_images.append(img)
        
    # converting all the frames for a test video into numpy array
    prediction_images = np.array(prediction_images)
    
    # predicting tags for each array
    prediction = model.predict_on_batch(prediction_images)
    
    # converting features to one-dimensional array
    prediction_labels = np.argmax(prediction, axis=1)
    
    # appending the mode of predictions in predict list to assign the tag to the video
    predict.append(1 if np.sum(prediction_labels) >= 5 else 0)
    
    # appending the actual tag of the video
    if videoFile.split('/')[1].split('_')[0] == 'harassment':
        actual.append(1)
    else:
        actual.append(0)

100%|██████████| 9/9 [00:15<00:00,  1.76s/it]


In [26]:
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

50.0

In [27]:
print(predict)

[0, 0, 0, 0, 0, 0, 0, 0]


In [28]:
print(actual)

[1, 1, 1, 1, 0, 0, 0, 0]
