In [None]:
# %pip install requests urllib if needed

from io import BytesIO
from urllib.parse import urlencode
from zipfile import ZipFile

import requests

base_url = "https://cloud-api.yandex.net/v1/disk/public/resources/download?"
public_key = "https://disk.yandex.ru/d/5psHmaR1GCQq4w"

final_url = base_url + urlencode(dict(public_key=public_key))
response = requests.get(final_url)
download_url = response.json()["href"]
response = requests.get(download_url)

dist_path = "D"
zipfile = ZipFile(BytesIO(response.content))
zipfile.extractall(path=dist_path)

In [162]:
import csv
import os
import pickle
import random
from sys import getsizeof

import cv2

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import datasets, layers, models

video_path = "Videos"
main_dir = "/home/jupyter/work/resources/D/Stavropol"
model_dir = "/home/jupyter/work/resources/D/Stavropol/models"
os.chdir(main_dir)


def getClasses():
    os.chdir(main_dir)
    classes = []
    with open("classes.csv") as fp:
        reader = csv.reader(fp, delimiter=",", quotechar='"')
        # next(reader, None)  # skip the headers
        data_read = [row for row in reader]
        classes = [x[1] for x in data_read[1:]]
    return classes


def createModel(classes):
    input_shape = (None, 30, 30, 3)

    model = models.Sequential()
 
    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(layers.ConvLSTM2D(filters = 4, kernel_size = (3, 3), activation = 'tanh',data_format = "channels_last",
                                recurrent_dropout=0.2, return_sequences=True, input_shape = input_shape, name="LSTM1"))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last', name="MP1"))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 8, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                                recurrent_dropout=0.2, return_sequences=True, name="LSTM2"))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last', name="MP2"))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 14, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                                recurrent_dropout=0.2, return_sequences=True, name="LSTM3"))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last', name="MP3"))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 16, kernel_size = (2, 2), activation = 'tanh', data_format = "channels_last",
                                recurrent_dropout=0.2, return_sequences=True, name="LSTM4"))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last', name="MP4"))
    #model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(layers.GlobalAveragePooling3D()) 
    
    model.add(layers.Dense(len(classes), activation = "softmax"))
    
    ########################################################################################################################
     
    # Display the models summary.
    print(model.summary())
    
    # Return the constructed convlstm model.
    return model


def getLabeledVideo(directory):
    os.chdir(f"{main_dir}/{directory}")
    categories = os.listdir()
    x_data = []
    y_data = []
    for category in range(len(categories)):
        # print(f"Категория: {categories[category]}")
        path = f"{main_dir}/{directory}/{categories[category]}"
        os.chdir(path)
        videos = os.listdir()
        for video in videos:
            video_path = os.getcwd() + f"/{video}"
            x_data.append(video_path)
            y_data.append(category)
    return x_data, np.array(y_data)


def getVideo(path_video):
    vid_capture = cv2.VideoCapture(path_video)
    video_numerized = []
    if vid_capture.isOpened() == False:
        print("Ошибка открытия видеофайла")
    else:
        file_count = 0
        while vid_capture.isOpened():
            ret, frame = vid_capture.read()
            if ret == True:
                file_count += 1
                frame = frame / 255.0
                video_numerized.append(frame)
            else:
                break
    return np.array(video_numerized)


def batch_generator(x_data, y_data, batch_size):
    for i in range(0, len(x_data), batch_size):
        yield np.array(
            list(map(getVideo, x_data[i : i + batch_size])), dtype="object"
        ), y_data[i : i + batch_size]


def format_frames(frame, output_size):
    frame = tf.image.convert_image_dtype(frame, tf.float32)
    frame = tf.image.resize_with_pad(frame, *output_size)
    return frame


def frames_from_video_file(video_path, n_frames, output_size=(30, 30), frame_step=15):
    # Read each video frame by frame
    result = []
    src = cv2.VideoCapture(str(video_path))

    video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

    need_length = 1 + (n_frames - 1) * frame_step

    if need_length > video_length:
        start = 0
    else:
        max_start = video_length - need_length
        start = random.randint(0, max_start + 1)

    src.set(cv2.CAP_PROP_POS_FRAMES, start)
    # ret is a boolean indicating whether read was successful, frame is the image itself
    ret, frame = src.read()
    result.append(format_frames(frame, output_size))

    for _ in range(n_frames - 1):
        for _ in range(frame_step):
            ret, frame = src.read()
            if ret:
                frame = format_frames(frame, output_size)
                result.append(frame)
            else:
                result.append(np.zeros_like(result[0]))
    src.release()
    result = np.array(result)[..., [2, 1, 0]]

    return result


class FrameGenerator:
    def __init__(self, video_paths, classes, n_frames, training=False):
        """Returns a set of frames with their associated label.

        Args:
          path: Video file paths.
          n_frames: Number of frames.
          training: Boolean to determine if training dataset is being created.
        """
        self.video_paths = video_paths
        self.classes = classes
        self.n_frames = n_frames
        self.training = training
        # self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))
        # self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))

    def get_files_and_class_names(self):
        video_paths = list(self.path.glob("*/*.avi"))
        classes = [p.parent.name for p in video_paths]
        return video_paths, classes

    def __call__(self):
        # video_paths, classes = self.get_files_and_class_names()

        pairs = list(zip(self.video_paths, self.classes))

        # if self.training:
        # random.shuffle(pairs)

        for path, name in pairs:
            video_frames = frames_from_video_file(path, self.n_frames)
            label = name  # self.class_ids_for_name[name] # Encode labels
            yield video_frames, label

In [163]:
classes = getClasses()
model = createModel(classes)
#model.build((100, 30, 30, 3))
# model.summary()

x_data, y_data = getLabeledVideo(video_path)

x_train, x_test, y_train, y_test = train_test_split(
    x_data, y_data, test_size=0.33, random_state=42
)
# print(getVideo(x_train[0]).shape, y_train[0])


model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

fg = FrameGenerator(x_train, y_train, 10, training=True)
# next(fg()).shape

output_signature = (
    tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.float32),
    tf.TensorSpec(shape=(), dtype=tf.int16),
)
train_dataset = tf.data.Dataset.from_generator(
    FrameGenerator(x_train, y_train, 10, training=True),
    output_signature=output_signature,
)
val_dataset = tf.data.Dataset.from_generator(
    FrameGenerator(x_test, y_test, 10), output_signature=output_signature
)

# ПОВЫШЕНИЕ ПРОИЗВОДИТЕЛЬНОСТИ
AUTOTUNE = tf.data.AUTOTUNE

# train_dataset = train_dataset.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
# val_dataset = val_dataset.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)

train_dataset_batched = train_dataset.batch(32)
val_dataset_batched = val_dataset.batch(32)

#print(f"Shape of training set of frames: {train_frames.shape}")

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 LSTM1 (ConvLSTM2D)          (None, None, 28, 28, 4)   1024      
                                                                 
 MP1 (MaxPooling3D)          (None, None, 14, 14, 4)   0         
                                                                 
 time_distributed_39 (TimeDi  (None, None, 14, 14, 4)  0         
 stributed)                                                      
                                                                 
 LSTM2 (ConvLSTM2D)          (None, None, 12, 12, 8)   3488      
                                                                 
 MP2 (MaxPooling3D)          (None, None, 6, 6, 8)     0         
                                                                 
 time_distributed_40 (TimeDi  (None, None, 6, 6, 8)    0         
 stributed)                                          

In [165]:

model.fit(
    t_b,
    epochs=5,
    validation_data=val_dataset,
    callbacks=tf.keras.callbacks.EarlyStopping(patience=2, monitor="val_loss"),
)

os.chdir(model_dir)
model.save("Model_CNN_LSTM_ver1.h5")

Epoch 1/5


  output, from_logits = _get_logits(


     46/Unknown - 82s 2s/step - loss: 3.1504 - accuracy: 0.0522

KeyboardInterrupt: 