In [11]:
import cv2
import os
import numpy as np

directory_path = 'MCI/video_sequence'
frame_list = []

width = height = 128

for filename in os.listdir(directory_path):
    if filename.endswith(".avi"):
        video_path = os.path.join(directory_path, filename)
        cap = cv2.VideoCapture(video_path)

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Convert frame to grayscale
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Resize the frame
            resized_frame = cv2.resize(gray_frame, (width, height))

            # Add the frame to the list
            frame_list.append(resized_frame)

        cap.release()

cv2.destroyAllWindows()

# Checking the shape of the first frame in the list
print(len(frame_list))
print(frame_list[0].shape)  # Should output: (128, 128)

25200
(128, 128)


In [12]:
frame_list = frame_list[330:660]

In [13]:
# Convert list to NumPy array and add an extra dimension
X_train = np.array(frame_list, dtype=np.float32)[..., np.newaxis]

# Normalize the data to the range [0, 1]
X_train = X_train / 255.0

print(X_train.shape)  # Should output: (num_frames, 32, 32, 1)


(330, 128, 128, 1)


In [14]:
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Input, LeakyReLU, BatchNormalization, Reshape, Flatten, Dense
from tensorflow.keras.models import Model

def build_autoencoder(input_shape):
    # Encoder
    input_layer = Input(shape=input_shape)
    x = Conv2D(32, (3, 3), strides=2, padding='same')(input_layer)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)

    x = Conv2D(64, (3, 3), strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)

    # Flatten and bottleneck
    volume_size = x.shape
    x = Flatten()(x)
    latent = Dense(100)(x)  # Latent vector

    # Decoder
    x = Dense(np.prod(volume_size[1:]))(latent)
    x = Reshape((volume_size[1], volume_size[2], volume_size[3]))(x)

    x = Conv2DTranspose(64, (3, 3), strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)

    x = Conv2DTranspose(32, (3, 3), strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)

    output_layer = Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)

    autoencoder = Model(input_layer, output_layer)
    return autoencoder

In [15]:
autoencoder = build_autoencoder(X_train.shape[1:])
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(X_train, X_train, epochs=50, batch_size=32)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x30068b050>

In [18]:
decoded_frames = autoencoder.predict(X_train)
decoded_frames.shape



(330, 128, 128, 3)

In [27]:
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

def show_frame(frame_index):
    frame = decoded_frames[frame_index]

    # Rescale if the frames are normalized
    if frame.max() <= 1.0:
        frame = frame * 255.0

    # Use 'gray' color map if the images are grayscale
    plt.imshow(frame.astype('uint8'), cmap='gray')
    plt.axis('off')
    plt.show()


# Create a slider to browse through the frames
frame_slider = widgets.IntSlider(value=0, min=0, max=len(decoded_frames)-1, step=1, description='Frame:')

# Display the widget
widgets.interactive(show_frame, frame_index=frame_slider)

interactive(children=(IntSlider(value=0, description='Frame:', max=329), Output()), _dom_classes=('widget-inte…

In [20]:
print(decoded_frames[0])  # Check the first frame
print(decoded_frames[0].max(), decoded_frames[0].min())  # Check the range of pixel values


[[[0.24179183 0.24582212 0.2546863 ]
  [0.20297872 0.2197698  0.21230456]
  [0.22108825 0.21824053 0.22062992]
  ...
  [0.22867098 0.24133006 0.21081477]
  [0.04079739 0.02991896 0.03690413]
  [0.03434381 0.02082683 0.03732532]]

 [[0.22667268 0.22095487 0.21701808]
  [0.20176773 0.20186368 0.19682921]
  [0.21521531 0.20943025 0.20439565]
  ...
  [0.22960635 0.2485736  0.23756473]
  [0.03718255 0.01561518 0.00592444]
  [0.01995436 0.00916524 0.01255686]]

 [[0.23113686 0.22385307 0.23353732]
  [0.19081733 0.22018343 0.21761881]
  [0.20311216 0.20996693 0.2144844 ]
  ...
  [0.21158509 0.22223361 0.20542642]
  [0.00987412 0.01625542 0.01461765]
  [0.01029321 0.00924378 0.00752059]]

 ...

 [[0.21102895 0.21085185 0.20779979]
  [0.19104898 0.20000213 0.1969489 ]
  [0.19528724 0.2043239  0.19911592]
  ...
  [0.20853238 0.22046512 0.21413812]
  [0.00288159 0.0020274  0.00095103]
  [0.0017644  0.00170999 0.00083506]]

 [[0.21537079 0.21337019 0.21005884]
  [0.19165999 0.19269626 0.19490357]
