In [2]:
import sys,os
sys.path.append('./violence_recognizer')

from violence_recognizer import trainer, model as modeling, predictor, data_generator
from violence_recognizer.data_processing import Dataset, LabelingPattern
import utility
import config as cfg


In [4]:
file_pattern = {'video': ('@'+cfg.BACKBONE+'.'+str(16)+'.'+str('v0')+'.video.npy').lower()}
           #'audio': ('@'+'yamnet'+'.'+str(1)+'.'+str('v0')+'.audio.npy').lower()}
file_pattern['audio'] = '@yamnet.1.v0.audio.npy'

file_pattern

{'video': '@resnet50.16.v0.video.npy', 'audio': '@yamnet.1.v0.audio.npy'}

In [None]:
my_dataset= Dataset(video_features_path = cfg.MY_VIDEO_FEATURES_PATH,
                   audio_features_path = cfg.MY_AUDIO_FEATURES_PATH,
                   label_mapper = LabelingPattern.my_dataset,
                   pattern = file_pattern)
    
rlvs = Dataset(video_features_path = cfg.RLVS_VIDEO_FEATURES_PATH,
                audio_features_path =  cfg.RLVS_AUDIO_FEATURES_PATH,
                label_mapper = LabelingPattern.rlvs,
                pattern=file_pattern)

rlvs.create_dataset()
my_dataset.create_dataset()

dataset = Dataset.merge_dataset([rlvs, my_dataset])
(len(dataset.zipped_features_path),
 len(dataset.labels),
 len(dataset.silent_video_list),
 len(dataset.video_features_path),
 len(dataset.audio_features_path))


2000 videos and 2000 audio files found in this dataset
1208 videos and 1208 audio files found in this dataset
After merging: 3208 files in this dataset


(3208, 3208, 1062, 3208, 3208)

In [7]:
trainer.define_model_configuration()

{'Backbone': 'Resnet50',
 'Cross_Dataset': '',
 'Debug': True,
 'EPOCHS': 50,
 'History_Path': './data/models_history/resnet50_my_dataset_rlvs_dataset_v0',
 'If_Mxnet_Model': False,
 'Input_Dim': (16, 2048),
 'LEARNING_RATE': 0.003,
 'Log_Path': './data/tensorboard_log/resnet50_my_dataset_rlvs_dataset_v0',
 'MODEL NAME': 'resnet50_my_dataset_rlvs_dataset_v0',
 'Model_Path': './data/saved_models/resnet50_my_dataset_rlvs_dataset_v0',
 'Result_Path': './data/generated_result/resnet50_my_dataset_rlvs_dataset_v0',
 'SEED': 0,
 'Test_Partition': 0.1,
 'Use_My_Dataset': True,
 'Use_Rlvs_Dataset': True,
 'Val_Partition': 0.1}


In [8]:
utility.make_dirs()

In [9]:
model = modeling.get_model(dimension = cfg.MODEL_DIMENSION,
                           summary = True,
                           input_shape = cfg.INPUT_DIM)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 video (InputLayer)             [(None, 16, 2048)]   0           []                               
                                                                                                  
 tf.math.reduce_max (TFOpLambda  (None, 2048)        0           ['video[0][0]']                  
 )                                                                                                
                                                                                                  
 tf.math.reduce_mean (TFOpLambd  (None, 2048)        0           ['video[0][0]']                  
 a)                                                                                               
                                                                                              

In [10]:
train_dataset, test_dataset = trainer.prepare_dataset(file_pattern)

1208 videos and 1208 audio files found in this dataset
2000 videos and 2000 audio files found in this dataset
After merging: 3208 files in this dataset


In [12]:
model, history = trainer.train_model(model,train_dataset,save_history=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Saving file as pandas dataframe.


In [13]:
model.load_weights(r"F:\S-Home\ViolenceRecognizer\data\saved_models\resnet50_my_dataset_rlvs_dataset_v0\model_015_0.009_1.000_.h5")

In [14]:
test_datagen = data_generator.TensorflowDataGenerator(test_dataset, batch_size=cfg.BATCH_SIZE*2, val_partition=1)
test_data, test_label = test_datagen.load_val()

In [19]:
classification = predictor.evaluate_model(model, test_data)



In [20]:
predictor.verbose_result(classification, test_label)

best model is ./data/saved_models/resnet50_my_dataset_rlvs_dataset_v0\model_015_0.009_1.000_.h5
{'BACKBONE': 'Resnet50',
 'Best model': './data/saved_models/resnet50_my_dataset_rlvs_dataset_v0\\model_015_0.009_1.000_.h5',
 'Class Mapping': {'No Physical Violence': 0, 'Physical Violence': 1},
 'DISCARD SILENT VIDEO': False,
 'INPUT_DIM': (16, 2048),
 'My dataset': True,
 'RLVS dataset': True,
 'SEED': 0,
 'Statistics': {'No of test samples': 320,
                'classification_report': {'0': {'f1-score': 0.99,
                                                'precision': 1.0,
                                                'recall': 0.9801980198019802,
                                                'support': 101},
                                          '1': {'f1-score': 0.9954545454545455,
                                                'precision': 0.9909502262443439,
                                                'recall': 1.0,
                                                'su