In [40]:
#EXTRACTING FRAMES

import csv
import glob
import os.path
from subprocess import call
import os

def extract_files():
    data_file = []
    folders = ['train','test']
    for folder in folders:
        class_folders = glob.glob(os.path.join(folder, '*'))

        for vid_class in class_folders:
            class_files = glob.glob(os.path.join(vid_class, '*.avi'))

            for video_path in class_files:
                # Get the parts of the file.
                video_parts = get_video_parts(video_path)

                train_or_test, classname, filename_no_ext, filename = video_parts

                # Only extract if we haven't done it yet. Otherwise, just get
                # the info.
                if not check_already_extracted(video_parts):
                    # Now extract it.
                    src = os.path.join(train_or_test, classname, filename)
                    dest = os.path.join(train_or_test, classname,
                        filename_no_ext + '-%04d.jpg')
                    call(["ffmpeg", "-i", src, dest])

                # Now get how many frames it is.
                nb_frames = get_nb_frames_for_video(video_parts)

                data_file.append([train_or_test, classname, filename_no_ext, nb_frames])

                #print("Generated %d frames for %s" % (nb_frames, filename_no_ext))

    with open('data_file.csv', 'w') as fout:
        writer = csv.writer(fout)
        writer.writerows(data_file)
    #[train|test], class, filename, nb frames
    print("Extracted and wrote %d video files." % (len(data_file)))

def get_nb_frames_for_video(video_parts):
    train_or_test, classname, filename_no_ext, _ = video_parts
    generated_files = glob.glob(os.path.join(train_or_test, classname,
                                filename_no_ext + '*.jpg'))
    return len(generated_files)

def get_video_parts(video_path):
    parts = video_path.split(os.path.sep)
    filename = parts[2]
    filename_no_ext = filename.split('.')[0]
    classname = parts[1]
    train_or_test = parts[0]

    return train_or_test, classname, filename_no_ext, filename

def check_already_extracted(video_parts):
    train_or_test, classname, filename_no_ext, _ = video_parts
    return bool(os.path.exists(os.path.join(train_or_test, classname,
                               filename_no_ext + '-0001.jpg')))

extract_files()

Extracted and wrote 76 video files.


In [85]:
import csv
import numpy as np
import random
import glob
import os.path
import sys
import operator
import threading

from keras.utils import to_categorical
class DataSet():

    def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = os.path.join('data', 'sequences')
        self.max_frames = 300  # max number of frames a video can have for us to use it
        self.data = self.get_data()
        self.classes = self.get_classes()
        self.data = self.clean_data()
        self.image_shape = image_shape

    @staticmethod
    def get_data():
        with open(os.path.join('data', 'data_file.csv'), 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)
        return data

    def clean_data(self):
        data_clean = []
        for item in self.data:
            if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \
                    and item[1] in self.classes:
                data_clean.append(item)

        return data_clean

    def get_classes(self):
        classes = []
        for item in self.data:
            if item[1] not in classes:
                classes.append(item[1])
        classes = sorted(classes)
        if self.class_limit is not None:
            return classes[:self.class_limit]
        else:
            return classes

    def get_class_one_hot(self, class_str):
        # Encode it first.
        label_encoded = self.classes.index(class_str)
        # Now one-hot it.
        label_hot = to_categorical(label_encoded, len(self.classes))
        assert len(label_hot) == len(self.classes)
        return label_hot

    def split_train_test(self):
        train = []
        test = []
        for item in self.data:
            if item[0] == 'train':
                train.append(item)
            else:
                test.append(item)
        return train, test

    def get_all_sequences_in_memory(self, train_test, data_type):
        train, test = self.split_train_test()
        data = train if train_test == 'train' else test

        print("Loading %d samples into memory for %sing." % (len(data), train_test))

        X, y = [], []
        for row in data:
            sequence = self.get_extracted_sequence(data_type, row)
            if sequence is None:
                print("Can't find sequence. Did you generate them?")
                raise
            X.append(sequence)
            y.append(self.get_class_one_hot(row[1]))
        return np.array(X), np.array(y)

    def get_extracted_sequence(self, data_type, sample):
        filename = sample[2]
        path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + \
            '-' + data_type + '.npy')
        if os.path.isfile(path):
            return np.load(path)
        else:
            return None

    def get_frames_by_filename(self, filename, data_type):
        sample = None
        for row in self.data:
            if row[2] == filename:
                sample = row
                break
        if sample is None:
            raise ValueError("Couldn't find sample: %s" % filename)
        sequence = self.get_extracted_sequence(data_type, sample)
        if sequence is None:
            raise ValueError("Can't find sequence. Did you generate them?")
        return sequence

    @staticmethod
    def get_frames_for_sample(sample):
        """Given a sample row from the data file, get all the corresponding frame
        filenames."""
        path = os.path.join('data', sample[0], sample[1])
        filename = sample[2]
        images = sorted(glob.glob(os.path.join(path, filename + '*jpg')))
        return images

    @staticmethod
    def rescale_list(input_list, size):
        assert len(input_list)>= size
        skip = len(input_list) // size
        output = [input_list[i] for i in range(0, len(input_list), skip)]
        return output[:size]

In [86]:
import numpy as np
import os.path
from keras.preprocessing import image as Img
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input
from tqdm import tqdm

# Get the dataset.
#seq_length = 40
data = DataSet(seq_length=40, class_limit=2)
#print(data)
base_model = InceptionV3(
    weights='imagenet',
    include_top=True
)
# We'll extract features at the final pool layer.
model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer('avg_pool').output
)

# Loop through data.
pbar = tqdm(total=len(data.data))
for video in data.data:
    # Get the path to the sequence for this video.
    path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy
    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)
    #print(frames)

    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, 40)
    #print(frames)
    #extracting features and appending to build the sequence.
    sequence = []
    for image in frames:
        img = Img.load_img(image, target_size=(299, 299))
        x = Img.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        features = model.predict(x)
        sequence.append(features[0])

    # Save the sequence.
    np.save(path, sequence)

    pbar.update(1)

pbar.close()























  0%|          | 0/34 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















  3%|▎         | 1/34 [00:09<04:58,  9.05s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















  6%|▌         | 2/34 [00:16<04:30,  8.44s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















  9%|▉         | 3/34 [00:22<04:05,  7.93s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















 12%|█▏        | 4/34 [00:30<03:57,  7.92s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















 15%|█▍        | 5/34 [00:37<03:41,  7.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















 18%|█▊        | 6/34 [00:44<03:31,  7.54s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















 21%|██        | 7/34 [00:51<03:13,  7.18s/it][

In [87]:
from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from collections import deque
import sys
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger
import time
import os.path
checkpointer = ModelCheckpoint(
    filepath=os.path.join('data', 'checkpoints','lstm-features' + '.{epoch:03d}-{val_loss:.3f}.hdf5'),
    verbose=1,
    save_best_only=True)

# Helper: TensorBoard
tb = TensorBoard(log_dir=os.path.join('data', 'logs', 'lstm'))

# Helper: Stop when we stop learning.
early_stopper = EarlyStopping(patience=10)

# Helper: Save results.
timestamp = time.time()
csv_logger = CSVLogger(os.path.join('data', 'logs', 'lstm' + '-' + 'training-' + \
    str(timestamp) + '.log'))

# Get the data and process it.
data = DataSet(
    seq_length=40,
    class_limit=70
)
#listt=[]
#listt2=[]
X, y = data.get_all_sequences_in_memory('train', 'features')
X_test, y_test = data.get_all_sequences_in_memory('test', 'features')
# for i in range(len(X)):
#  for j in range(70):
  #   if (y[i][j]==1) and not(j in listt):
  #    listt.append(j)
# for i in range(len(X_test)):
#  for j in range(70):
  #   if (y_test[i][j]==1) and not(j in listt2):
  #    listt2.append(j)
#print(listt)
#print(listt2)
#listt3= []
# for i in range(len(listt2)):
#  if not(listt2[i] in listt):
  #   listt3.append(listt2[i])
#X_test2 = X_test.copy()
#y_test2 = y_test.copy()
#for i in range(len(X_test)):
  # flag=1
  #for j in range(70):
    # if(y_test[i][j]==1) and (j in listt3):
    #  flag=0
      # break
  #if flag==1:
    # X_test2 = np.append(X_test2,[X_test[i]],axis=0)
    #y_test2 = np.append(y_test2,[y_test[i]],axis=0)
#print(X_test2.shape)
#print(y_test2.shape)
#l = X_test2.shape[0]-X_test.shape[0]
#X_test = X_test2[-l:,:,:]
#y_test = y_test2[-l:,:]

model = Sequential()
model.add(LSTM(2048, return_sequences=False,input_shape=(40,2048),dropout=0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(data.classes), activation='softmax'))
optimizer = Adam(lr=1e-5, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                    metrics=['accuracy','top_k_categorical_accuracy'])
print(model.summary())

model.fit(
    X,
    y,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1,
    callbacks=[tb, early_stopper, csv_logger,checkpointer],
    epochs=100)
    

Loading 17 samples into memory for training.
Loading 17 samples into memory for testing.
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_10 (LSTM)               (None, 2048)              33562624  
_________________________________________________________________
dense_19 (Dense)             (None, 512)               1049088   
_________________________________________________________________
dropout_10 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 2)                 1026      
Total params: 34,612,738
Trainable params: 34,612,738
Non-trainable params: 0
_________________________________________________________________
None
Train on 17 samples, validate on 17 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.74458, saving model to data/checkpoints/lst


Epoch 00022: val_loss improved from 0.44247 to 0.42978, saving model to data/checkpoints/lstm-features.022-0.430.hdf5
Epoch 23/100

Epoch 00023: val_loss improved from 0.42978 to 0.41807, saving model to data/checkpoints/lstm-features.023-0.418.hdf5
Epoch 24/100

Epoch 00024: val_loss improved from 0.41807 to 0.40730, saving model to data/checkpoints/lstm-features.024-0.407.hdf5
Epoch 25/100

Epoch 00025: val_loss improved from 0.40730 to 0.39696, saving model to data/checkpoints/lstm-features.025-0.397.hdf5
Epoch 26/100

Epoch 00026: val_loss improved from 0.39696 to 0.38675, saving model to data/checkpoints/lstm-features.026-0.387.hdf5
Epoch 27/100

Epoch 00027: val_loss improved from 0.38675 to 0.37707, saving model to data/checkpoints/lstm-features.027-0.377.hdf5
Epoch 28/100

Epoch 00028: val_loss improved from 0.37707 to 0.36764, saving model to data/checkpoints/lstm-features.028-0.368.hdf5
Epoch 29/100

Epoch 00029: val_loss improved from 0.36764 to 0.35836, saving model to dat

Epoch 47/100

Epoch 00047: val_loss improved from 0.23667 to 0.23080, saving model to data/checkpoints/lstm-features.047-0.231.hdf5
Epoch 48/100

Epoch 00048: val_loss improved from 0.23080 to 0.22509, saving model to data/checkpoints/lstm-features.048-0.225.hdf5
Epoch 49/100

Epoch 00049: val_loss improved from 0.22509 to 0.21947, saving model to data/checkpoints/lstm-features.049-0.219.hdf5
Epoch 50/100

Epoch 00050: val_loss improved from 0.21947 to 0.21391, saving model to data/checkpoints/lstm-features.050-0.214.hdf5
Epoch 51/100

Epoch 00051: val_loss improved from 0.21391 to 0.20848, saving model to data/checkpoints/lstm-features.051-0.208.hdf5
Epoch 52/100

Epoch 00052: val_loss improved from 0.20848 to 0.20302, saving model to data/checkpoints/lstm-features.052-0.203.hdf5
Epoch 53/100

Epoch 00053: val_loss improved from 0.20302 to 0.19779, saving model to data/checkpoints/lstm-features.053-0.198.hdf5
Epoch 54/100

Epoch 00054: val_loss improved from 0.19779 to 0.19271, saving


Epoch 00071: val_loss improved from 0.12584 to 0.12173, saving model to data/checkpoints/lstm-features.071-0.122.hdf5
Epoch 72/100

Epoch 00072: val_loss improved from 0.12173 to 0.11802, saving model to data/checkpoints/lstm-features.072-0.118.hdf5
Epoch 73/100

Epoch 00073: val_loss improved from 0.11802 to 0.11426, saving model to data/checkpoints/lstm-features.073-0.114.hdf5
Epoch 74/100

Epoch 00074: val_loss improved from 0.11426 to 0.11045, saving model to data/checkpoints/lstm-features.074-0.110.hdf5
Epoch 75/100

Epoch 00075: val_loss improved from 0.11045 to 0.10687, saving model to data/checkpoints/lstm-features.075-0.107.hdf5
Epoch 76/100

Epoch 00076: val_loss improved from 0.10687 to 0.10342, saving model to data/checkpoints/lstm-features.076-0.103.hdf5
Epoch 77/100

Epoch 00077: val_loss improved from 0.10342 to 0.10024, saving model to data/checkpoints/lstm-features.077-0.100.hdf5
Epoch 78/100

Epoch 00078: val_loss improved from 0.10024 to 0.09740, saving model to dat


Epoch 00095: val_loss improved from 0.06008 to 0.05816, saving model to data/checkpoints/lstm-features.095-0.058.hdf5
Epoch 96/100

Epoch 00096: val_loss improved from 0.05816 to 0.05632, saving model to data/checkpoints/lstm-features.096-0.056.hdf5
Epoch 97/100

Epoch 00097: val_loss improved from 0.05632 to 0.05442, saving model to data/checkpoints/lstm-features.097-0.054.hdf5
Epoch 98/100

Epoch 00098: val_loss improved from 0.05442 to 0.05258, saving model to data/checkpoints/lstm-features.098-0.053.hdf5
Epoch 99/100

Epoch 00099: val_loss improved from 0.05258 to 0.05080, saving model to data/checkpoints/lstm-features.099-0.051.hdf5
Epoch 100/100

Epoch 00100: val_loss improved from 0.05080 to 0.04903, saving model to data/checkpoints/lstm-features.100-0.049.hdf5


<keras.callbacks.callbacks.History at 0xd696cef60>

In [91]:
import numpy as np
import os.path
from keras.preprocessing import image as Img
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input
import glob

def rescale_list(input_list, size):
    assert len(input_list) >= size
    skip = len(input_list) // size
    output = [input_list[i] for i in range(0, len(input_list), skip)]
    return output[:size]
classes = glob.glob("data/train/*")
classes = [classes[i].split('/')[2] for i in range(len(classes))]
classes = sorted(classes)

import cv2 
import os 
image_name = '9.avi'
cam = cv2.VideoCapture(image_name) 
currentframe = 0
  
frames=[]
while(True): 
    ret,frame = cam.read() 
    if ret: 
        # if video is still left continue creating images 
        name = 'testFinal/frame'+'9' +"frame_no"+ str(currentframe) + '.jpg'
        cv2.imwrite(name, frame) 
        frames.append(name)  
        currentframe += 1
    else: 
        break
cam.release() 
cv2.destroyAllWindows()
rescaled_list = rescale_list(frames,40)

base_model = InceptionV3(
    weights='imagenet',
    include_top=True
)
# We'll extract features at the final pool layer.
inception_model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer('avg_pool').output
)
sequence = []
for image in rescaled_list:
        img = Img.load_img(image, target_size=(299, 299))
        x = Img.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        features = inception_model.predict(x)
        sequence.append(features[0])

sequence = np.array([sequence])
prediction = model.predict(sequence)
maxm = prediction[0][0]
maxid = 0
for i in range(len(prediction[0])):
      if(maxm<prediction[0][i]):
            maxm = prediction[0][i]
            maxid = i
#print(frames)
print(image_name,' ------- ',classes[maxid])

9.avi  -------  Help
