In [2]:
# downloads C3D weights from https://github.com/aslucki/C3D_Sport1M_keras
!mkdir weights
!gdown --id 1rlZ-xTkTMjgWKiQFUedRnHlDgQwx6yTm -O weights/weights.h5

mkdir: weights: File exists
Downloading...
From (original): https://drive.google.com/uc?id=1rlZ-xTkTMjgWKiQFUedRnHlDgQwx6yTm
From (redirected): https://drive.google.com/uc?id=1rlZ-xTkTMjgWKiQFUedRnHlDgQwx6yTm&confirm=t&uuid=aa733e44-07e7-4b3b-be72-ea5fe1f80cc2
To: /Users/david/Desktop/violance_detection/weights/weights.h5
100%|████████████████████████████████████████| 320M/320M [00:49<00:00, 6.41MB/s]


In [None]:

# C3D definition
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv3D, MaxPooling3D, ZeroPadding3D

def create_C3D_model(summary = False):
    """Creates model object with the sequential API: https://keras.io/models/sequential/

    Parameters
    ----------
    summary : bool
              if True, prints the model summary (default False)

    Returns
    -------
    model : Sequential
            The instantiated model
    """

    model = Sequential()
    input_shape = (16, 112, 112, 3)

    model.add(Conv3D(64, (3, 3, 3), activation='relu',
                     padding='same', name='conv1',
                     input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
                           padding='valid', name='pool1'))
    # 2nd layer group
    model.add(Conv3D(128, (3, 3, 3), activation='relu',
                     padding='same', name='conv2'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool2'))
    # 3rd layer group
    model.add(Conv3D(256, (3, 3, 3), activation='relu',
                     padding='same', name='conv3a'))
    model.add(Conv3D(256, (3, 3, 3), activation='relu',
                     padding='same', name='conv3b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool3'))
    # 4th layer group
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv4a'))
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv4b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool4'))
    # 5th layer group
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv5a'))
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                     padding='same', name='conv5b'))
    model.add(ZeroPadding3D(padding=((0, 0), (0, 1), (0, 1)), name='zeropad5'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool5'))
    model.add(Flatten())
    # FC layers group
    model.add(Dense(4096, activation='relu', name='fc6'))
    model.add(Dropout(.5))
    model.add(Dense(4096, activation='relu', name='fc7'))
    model.add(Dropout(.5))
    model.add(Dense(487, activation='softmax', name='fc8'))

    if summary:
      print(model.summary())

    return model

In [None]:
# Utility functions for the experiments (chunk count, video preprocessing, feature computation, )
from keras.models import Model
import os
import cv2
import numpy as np


def getFeatureExtractor(weigthsPath, layer, verbose = False):
    """Gets the C3D feature extractor

    Parameters
    ----------
    weightsPath : str
                  Pathname of the weights file for the C3D model.
    layer : str
            Name of the output layer for the feature extractor
    verbose : bool
              if True print debug logs (default True)

    Returns
    -------

    Model : Model class
            Feature extractor

    """

    model = create_C3D_model(verbose)
    model.load_weights(weigthsPath)
    model.compile(loss='mean_squared_error', optimizer='sgd')

    return Model(inputs=model.input,outputs=model.get_layer(layer).output)

def count_chunks(videoBasePath):
    """Counts the 16 frames lenght chunks available in a dataset organized in violent and non-violent,
    cam1 and cam2 folders, placed at videoBasePath.

    Parameters
    ----------
    videoBasePath : str
                    Base path of the dataset

    Returns
    -------
    cnt : int
          number of 16 frames lenght chunks in the dataset
    """

    folders = ['violent', 'non-violent']
    cams = ['cam1', 'cam2']
    cnt = 0

    for folder in folders:
        for camName in cams:
            path = os.path.join(videoBasePath, folder, camName)

            videofiles = os.listdir(path)
            for videofile in videofiles:
                filePath = os.path.join(path, videofile)
                video = cv2.VideoCapture(filePath)
                numframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(video.get(cv2.CAP_PROP_FPS))
                chunks = numframes//16
                cnt += chunks


    return cnt

def preprocessVideos(videoBasePath, featureBasePath, verbose=True):
    """Preproccess all the videos.

    It extracts samples for the input of C3D from a video dataset, organised in violent and non-violent, cam1 and cam2 folders.
    The samples and the labels are store on two memmap numpy arrays, called samples.mmap and labels.mmap, at "featureBasePath".
    The numpy array with samples has shape (Chunk #, 16, 112, 112, 3), the labels array has shape (Chunk # 16, 112, 112, 3).
    For the AIRTLab dataset the number of chunks is 3537.

    Parameters
    ----------
    videoBasePath : str
                    Pathname to the base of the video repository, which contains two directories,
                    violent and non-violent, which are divided into cam1 and cam2.
    featureBasePath : str
                      it is the pathname of a base where the numpy arrays have to be saved.
    verbose : bool
              if True print debug logs (default True)

    """

    folders = ['violent', 'non-violent']
    cams = ['cam1', 'cam2']
    total_chunks = count_chunks(videoBasePath)
    npSamples = np.memmap(os.path.join(featureBasePath, 'samples.mmap'), dtype=np.float32, mode='w+', shape=(total_chunks, 16, 112, 112, 3))
    npLabels = np.memmap(os.path.join(featureBasePath, 'labels.mmap'), dtype=np.int8, mode='w+', shape=(total_chunks))
    cnt = 0

    for folder in folders:
        for camName in cams:
            path = os.path.join(videoBasePath, folder, camName)

            videofiles = os.listdir(path)
            for videofile in videofiles:
                filePath = os.path.join(path, videofile)
                video = cv2.VideoCapture(filePath)
                numframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(video.get(cv2.CAP_PROP_FPS))
                chunks = numframes//16
                if verbose:
                    print(filePath)
                    print("*** [Video Info] Number of frames: {} - fps: {} - chunks: {}".format(numframes, fps, chunks))
                vid = []
                videoFrames = []
                while True:
                    ret, img = video.read()
                    if not ret:
                        break
                    videoFrames.append(cv2.resize(img, (112, 112)))
                vid = np.array(videoFrames, dtype=np.float32)
                filename = os.path.splitext(videofile)[0]
                chunk_cnt = 0
                for i in range(chunks):
                    X = vid[i*16:i*16+16]
                    chunk_cnt += 1
                    npSamples[cnt] = np.array(X, dtype=np.float32)
                    if folder == 'violent':
                        npLabels[cnt] = np.int8(1)
                    else:
                        npLabels[cnt] = np.int8(0)
                    cnt += 1

    if verbose:
        print("** Labels **")
        print(npLabels.shape)
        print('\n****\n')
        print("** Samples **")
        print(npSamples.shape)
        print('\n****\n')

    del npSamples
    del npLabels

def extractFeatures(weigthsPath, videoBasePath, featureBasePath='', verbose=True):
    """Extracts features from a video dataset, using fc6 of the C3D network.

    It extracts features from a video dataset, organized in violent and non-violent, cam1 and cam2 folders.

    Parameters
    ----------
    weightsPath : str
                  Pathname of the weights file for the C3D model.
    videoBasePath : str
                    Pathname to the base of the video repository, which contains two directories,
                    violent and non-violent, which are divided into cam1 and cam2.
    featureBasePath : str
                      if non-empty, it is the pathname of a base where numpy array has to be saved.
                      It assumes it is organized in violent, non-violent, cam1 and cam2 exactly as
                      the video repository (default '').
    verbose : bool
              if True print debug logs (default True)

    Returns
    -------
    X : numpy.ndarray
        Features array of shape (Num of video chunks, 4096) representing the 4096-dim feature vector for each video
        chunk in the dataset
    y : numpy.ndarray
        Labels array of shape (Num of video chunks) representing the labels for all the video chunks in the dataset
        (1 = violent, 2 = non violent)

    """

    featureExtractor = getFeatureExtractor(weigthsPath, 'fc6', verbose)

    folders = ['violent', 'non-violent']
    cams = ['cam1', 'cam2']
    labels = []
    features = []

    for folder in folders:
        for camName in cams:
            path = os.path.join(videoBasePath, folder, camName)
            featurepath = os.path.join(featureBasePath, folder, camName)

            videofiles = os.listdir(path)
            for videofile in videofiles:
                filePath = os.path.join(path, videofile)
                video = cv2.VideoCapture(filePath)
                numframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(video.get(cv2.CAP_PROP_FPS))
                chunks = numframes//16
                if verbose:
                    print(filePath)
                    print("*** [Video Info] Number of frames: {} - fps: {} - chunks: {}".format(numframes, fps, chunks))
                vid = []
                videoFrames = []
                while True:
                    ret, img = video.read()
                    if not ret:
                        break
                    videoFrames.append(cv2.resize(img, (112, 112)))
                vid = np.array(videoFrames, dtype=np.float32)

                filename = os.path.splitext(videofile)[0]
                if featureBasePath:
                    featureFilePath = os.path.join(featurepath, filename + '.csv')
                    with open(featureFilePath, 'ab') as f:
                        for i in range(chunks):
                            X = vid[i*16:i*16+16]
                            out = featureExtractor.predict(np.array([X]))
                            np.savetxt(f, out)
                            out = out.reshape(4096)
                            features.append(out)
                            if folder == 'violent':
                                labels.append(1)
                            else:
                                labels.append(0)

                    if verbose:
                        print('*** Saved file: ' + featureFilePath)
                        print('\n')
                else:
                    for i in range(chunks):
                        X = vid[i*16:i*16+16]
                        out = featureExtractor.predict(np.array([X]))
                        out = out.reshape(4096)
                        features.append(out)
                        if folder == 'violent':
                            labels.append(1)
                        else:
                            labels.append(0)

    y = np.array(labels)
    X = np.array(features)

    if verbose:
        print("** Labels **")
        # print(y)
        print(y.shape)
        print('\n****\n')
        print("** Features **")
        # print(X)
        print(X.shape)
        print('\n****\n')

    return X, y

def get_labels_and_features_from_files(basePath, verbose=True):
    """"Generates the feature array and the labels from saved feature files.

    It generates features and labels from saved features files, organised in violent and
    non-violent, cam1 and cam2 folders.

    Parameters
    ----------
    basePath : str
               Pathname to the base of the feature files repository, which contains two directories,
               violent and non-violent, which are divided into cam1 and cam2.
    verbose : bool
              if True print debug logs (default True)

    Returns
    -------
    X : numpy.ndarray
        Features array of shape (Num of video chunks, 4096) representing the 4096-dim feature vector for each video
        chunk in the dataset
    y : numpy.ndarray
        Labels array of shape (Num of video chunks) representing the labels for all the video chunks in the dataset
        (1 = violent, 2 = non violent)

    """

    folders = ['violent', 'non-violent']
    cams = ['cam1', 'cam2']
    labels = []
    features = []

    for folder in folders:
        for camName in cams:
            path = os.path.join(basePath, folder, camName)

            textfiles = os.listdir(path)
            for textfile in textfiles:
                filePath = os.path.join(path, textfile)
                chunks = np.loadtxt(filePath)
                for chunk in chunks:
                  features.append(chunk)
                  if folder == 'violent':
                    labels.append(1)
                  else:
                    labels.append(0)

    y = np.array(labels)
    X = np.array(features)

    if verbose:
        print("** Labels **")
        # print(y)
        print(y.shape)
        print('\n****\n')
        print("** Features **")
        # print(X)
        print(X.shape)
        print('\n****\n')

    return X, y


# Feature Extraction with C3D

In [None]:
# folders to store samples and features during the experiments

!rm -rf airtlabDataset
!mkdir airtlabDataset
!mkdir airtlabDataset/violent
!mkdir airtlabDataset/violent/cam1
!mkdir airtlabDataset/violent/cam2
!mkdir airtlabDataset/non-violent
!mkdir airtlabDataset/non-violent/cam1
!mkdir airtlabDataset/non-violent/cam2
!mkdir airtlabDataset/results

In [None]:
extractFeatures('weights/weights.h5', 'datarepo/violence-detection-dataset', 'airtlabDataset', True)

# Testing C3D + SVM

In [None]:
# Experiment 1: C3D + SVM
import pandas as pd
import numpy as np
import sklearn
from sklearn import svm
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.metrics import roc_curve, auc, accuracy_score, confusion_matrix, classification_report
import matplotlib.pylab as plt

X, y = get_labels_and_features_from_files('airtlabDataset', False)


# Cross Validation
clf = svm.SVC(kernel='linear', C = 1, probability=True)

nsplits = 5
cv = StratifiedShuffleSplit(n_splits=nsplits, train_size=0.8, random_state=42)


tprs = []
aucs = []
scores = []
sens = np.zeros(shape=(nsplits))
specs = np.zeros(shape=(nsplits))
f1Scores = np.zeros(shape=(nsplits))
mean_fpr = np.linspace(0, 1, 100)
plt.figure(num=1, figsize=(10,10))
i = 1
for train, test in cv.split(X, y):
    # train = sklearn.utils.shuffle(train)
    clf.fit(X[train], y[train])
    pred_acc = clf.predict(X[test])
    split_acc = accuracy_score(y[test], pred_acc)
    scores.append(split_acc)
    prediction = clf.predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], prediction[:, 1], pos_label=1)
    tprs.append(np.interp(mean_fpr, fpr, tpr))
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)
    plt.plot(fpr, tpr, lw=2, alpha=0.3, label='ROC split %d (AUC = %0.4f)' % (i, roc_auc))
    print('confusion matrix split ' + str(i))
    print(confusion_matrix(y[test], pred_acc))

    report = classification_report(y[test], pred_acc, target_names=['non-violent', 'violent'], output_dict=True)
    sens[i - 1] = report['violent']['recall']
    specs[i - 1] = report['non-violent']['recall']
    f1Scores[i - 1] = report['violent']['f1-score']
    print(classification_report(y[test], prediction.argmax(axis=-1), target_names=['non-violent', 'violent']))
    print('Accuracy: ' + str(split_acc))
    print('\n')
    i += 1

plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)

mean_tpr = np.mean(tprs, axis=0)
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.4f %0.4f)' % (mean_auc, std_auc), lw=2, alpha=.8)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'1 std. dev.')

plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
plt.xlabel('False Positive Rate',fontsize=18)
plt.ylabel('True Positive Rate',fontsize=18)
plt.title('Cross-Validation ROC of C3D + SVM model',fontsize=18)
plt.legend(loc="lower right", prop={'size': 15})

plt.savefig('C3D_SVM' + '.pdf')
plt.show()

print('Accuracies')
print(scores)
print('Sensitivities')
print(sens)
print('specificities')
print(specs)
print('F1-scores')
print(f1Scores)
print("Avg accuracy: {0} +/- {1}".format(np.mean(scores, axis=0), np.std(scores, axis=0)))
print("Avg sensitivity: {0} +/- {1}".format(np.mean(sens), np.std(sens)))
print("Avg specificity: {0} +/- {1}".format(np.mean(specs), np.std(specs)))
print("Avg f1-score: {0} +/- {1}".format(np.mean(f1Scores), np.std(f1Scores)))


In [None]:
import pickle

filename = 'finalized_model.sav'
pickle.dump(clf, open(filename, 'wb'))