# بسم الله الرحمن الرحيم

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import tensorflow as tf
import zipfile

# load data

In [None]:
with zipfile.ZipFile('/content/drive/MyDrive/Data set/Shop DataSet.zip','r') as file :
  file.extractall('/content/Data set')

In [None]:
dataset = {}
datapath = '/content/Data set/Shop DataSet'

for filename in os.listdir(datapath):
  videos_paths = []
  for video_name in os.listdir(os.path.join(datapath,filename)):
    videos_paths.append(os.path.join(datapath,filename,video_name))
  dataset[filename] = videos_paths


In [None]:
plt.figure(figsize=(10,10))
plt.bar(dataset.keys(), [len(videos) for videos in dataset.values()])
plt.show()

In [None]:
data_list = []
for category, videos in dataset.items():
    for video_path in videos:
        data_list.append({'labels': category, 'Video Path': video_path})

df_videos = pd.DataFrame(data_list)
df_videos

In [None]:
# Identify the minority and majority classes
class_counts = df_videos['labels'].value_counts()
minority_class = class_counts.idxmin()
majority_class = class_counts.idxmax()
minority_count = class_counts[minority_class]
majority_count = class_counts[majority_class]

print(f"Minority class: {minority_class} with {minority_count} samples")
print(f"Majority class: {majority_class} with {majority_count} samples")

# Separate minority and majority class samples
df_minority = df_videos[df_videos['labels'] == minority_class]
df_majority = df_videos[df_videos['labels'] == majority_class]

# Randomly duplicate minority class samples
df_minority_oversampled = df_minority.sample(majority_count, replace=True, random_state=42)

# Concatenate the oversampled minority class with the majority class
df_oversampled = pd.concat([df_majority, df_minority_oversampled], axis=0)

# Shuffle the oversampled dataset
df_oversampled = df_oversampled.sample(frac=1, random_state=42).reset_index(drop=True)

print("\nClass distribution after oversampling:")
print(df_oversampled['labels'].value_counts())

oversampled_class_counts = df_oversampled['labels'].value_counts()
plt.figure(figsize=(10,10))
plt.bar(oversampled_class_counts.index, oversampled_class_counts.values)
plt.title('Class Distribution After Oversampling')
plt.xlabel('Class')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df_oversampled['labels'] = le.fit_transform(df_oversampled['labels'])
df_oversampled

In [None]:

def video_generator(df, frames_per_video=32, img_size=(224,224)):
    for _, row in df.iterrows():
        path = row['Video Path']
        label = row['labels']
        cap = cv2.VideoCapture(path)
        frames = []
        while len(frames) < frames_per_video:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, img_size)
            frame = frame[:, :, ::-1]            # BGR -> RGB
            frame = frame.astype(np.float32) / 255.0
            frames.append(frame)
        cap.release()
        # padding if short
        while len(frames) < frames_per_video:
            frames.append(np.zeros_like(frames[0], dtype=np.float32))
        yield np.array(frames, dtype=np.float32), np.int32(label)

# output signature (channels_last): (frames, H, W, C)
frames_per_video = 16
img_size = (256, 256)
output_signature = (
    tf.TensorSpec(shape=(frames_per_video, img_size[0], img_size[1], 3), dtype=tf.float32),
    tf.TensorSpec(shape=(), dtype=tf.int32)
)

In [None]:
# split data into train, validation , and test
from sklearn.model_selection import train_test_split

# Split the oversampled DataFrame
train_df, test_df = train_test_split(df_oversampled, test_size=0.2, random_state=42)

# Create TensorFlow Datasets from the split DataFrames
train_ds = tf.data.Dataset.from_generator(
    lambda: video_generator(train_df, frames_per_video, img_size),
    output_signature=output_signature
)

test_ds = tf.data.Dataset.from_generator(
    lambda: video_generator(test_df, frames_per_video, img_size),
    output_signature=output_signature
)


# optional augmentation function (tf ops)
def augment(frames, label):
    # frames: (T,H,W,C)
    frames = tf.image.random_flip_left_right(frames)
    # you can add random crop, brightness, etc.
    return frames, label

# Apply augmentation to the training dataset
train_ds = train_ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE)

# Batch and prefetch both datasets
train_ds = train_ds.batch(4).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.batch(4).prefetch(tf.data.AUTOTUNE)

print("Training dataset:")
print(train_ds)
print("\nTesting dataset:")
print(test_ds)

In [None]:
# build 3D CNN model

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(frames_per_video , img_size[0], img_size[1], 3)),
    tf.keras.layers.Conv3D(64 , (3,3,3) , strides=1 , padding='same' , activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling3D((1,2,2)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv3D(128 , (3,3,3) , activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling3D((2,2,2)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv3D(256 , (3,3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling3D((2,2,2)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GlobalAveragePooling3D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid') # Changed to 1 output unit for binary classification
])

In [None]:
# train model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_ds,
    epochs = 15 ,
    validation_data = test_ds
)

In [None]:
loss , acc = model.evaluate(test_ds)

In [None]:
print(f"Loss : {loss}")
print(f"Accuracy : {acc}")