In [3]:
import pandas as pd
import numpy as np


In [1]:
TRAIN_CSV = '/kaggle/input/ucf101-action-recognition/train.csv'
VAL_CSV = '/kaggle/input/ucf101-action-recognition/val.csv'
TEST_CSV = '/kaggle/input/ucf101-action-recognition/test.csv'

train_df = pd.read_csv(TRAIN_CSV)
val_df = pd.read_csv(VAL_CSV)
test_df = pd.read_csv(TEST_CSV)

In [2]:
import cv2
import numpy as np

def extract_video_features(video_path, num_frames=5):
    # Initialize video capture object
    cap = cv2.VideoCapture(video_path)

    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize variables to store features
    color_histograms = []
    edge_histograms = []

    # Extract frames from the video
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            # Convert frame to grayscale
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Compute color histogram for each channel (RGB)
            color_hist = []
            for j in range(3):
                hist = cv2.calcHist([frame], [j], None, [256], [0, 256])
                color_hist.append(hist.flatten())
            color_histograms.append(np.concatenate(color_hist))

            # Compute edge histogram
            edges = cv2.Canny(gray_frame, 100, 200)
            edge_hist = cv2.calcHist([edges], [0], None, [256], [0, 256])
            edge_histograms.append(edge_hist.flatten())

    # Release video capture object
    cap.release()

    # Create feature dictionary
    features = {
        'color_histograms': np.array(color_histograms).mean(axis=0),
        'edge_histograms': np.array(edge_histograms).mean(axis=0),
    }

    return features

# Extract features from the first video file as an example
video_path = os.path.join(apply_eye_makeup_folder_path, apply_eye_makeup_files[0])
features = extract_video_features(video_path)
features


NameError: ignored

In [None]:
# Initialize variables to store features for all videos
all_color_histograms = []
all_edge_histograms = []

# Extract features from all video files
for video_file in apply_eye_makeup_files:
    video_path = os.path.join(apply_eye_makeup_folder_path, video_file)
    features = extract_video_features(video_path)
    all_color_histograms.append(features['color_histograms'])
    all_edge_histograms.append(features['edge_histograms'])

# Convert features to NumPy arrays
all_color_histograms = np.array(all_color_histograms)
all_edge_histograms = np.array(all_edge_histograms)

# Check shapes of extracted features
all_color_histograms.shape, all_edge_histograms.shape


In [None]:
import matplotlib.pyplot as plt

# Function to plot histogram
def plot_histogram(data, title, xlabel, ylabel, bins=50):
    plt.figure(figsize=(10, 5))
    plt.hist(data.flatten(), bins=bins, color='skyblue', edgecolor='black')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(axis='y', alpha=0.75)
    plt.show()

# Plot color histogram distribution
plot_histogram(all_color_histograms, 'Color Histogram Distribution', 'Pixel Intensity', 'Frequency')

# Plot edge histogram distribution
plot_histogram(all_edge_histograms, 'Edge Histogram Distribution', 'Edge Intensity', 'Frequency')


In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize standard scaler
scaler = StandardScaler()

# Normalize color histogram features
all_color_histograms_normalized = scaler.fit_transform(all_color_histograms)

# Normalize edge histogram features
all_edge_histograms_normalized = scaler.fit_transform(all_edge_histograms)

# Check shapes of normalized features
all_color_histograms_normalized.shape, all_edge_histograms_normalized.shape


In [None]:
from sklearn.model_selection import train_test_split

# Combine color and edge histogram features
all_features = np.concatenate((all_color_histograms_normalized, all_edge_histograms_normalized), axis=1)

# Split data into training and testing sets
X_train, X_test = train_test_split(all_features, test_size=0.2, random_state=42)

# Check shapes of training and testing sets
X_train.shape, X_test.shape


# **K-Means CLustering**

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Determine the optimal number of clusters using the silhouette score
silhouette_scores = []
n_clusters_range = range(2, 11)
for n_clusters in n_clusters_range:
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(all_features)
    silhouette_avg = silhouette_score(all_features, labels)
    silhouette_scores.append(silhouette_avg)

# Plot silhouette scores
plt.figure(figsize=(10, 5))
plt.plot(n_clusters_range, silhouette_scores, marker='o', linestyle='dashed')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score vs Number of Clusters')
plt.grid(axis='both', alpha=0.75)
plt.show()

# Apply K-Means clustering with the optimal number of clusters
optimal_n_clusters = n_clusters_range[np.argmax(silhouette_scores)]
kmeans = KMeans(n_clusters=optimal_n_clusters, random_state=42)
labels = kmeans.fit_predict(all_features)



# **Anomaly Detection**


In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt

# Normalize the features to be in the range [0, 1]
scaler = MinMaxScaler()
features_normalized = scaler.fit_transform(all_features)

# Build the autoencoder model
input_layer = Input(shape=(features_normalized.shape[1],))
encoded = Dense(64, activation='relu')(input_layer)
decoded = Dense(features_normalized.shape[1], activation='sigmoid')(encoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error')

# Train the autoencoder
autoencoder.fit(features_normalized, features_normalized, epochs=50, batch_size=16, shuffle=True, validation_split=0.2)

# Calculate the reconstruction error for each data point
reconstructed_features = autoencoder.predict(features_normalized)
reconstruction_error = np.mean(np.square(features_normalized - reconstructed_features), axis=1)

# Plot the reconstruction error
plt.figure(figsize=(10, 5))
plt.hist(reconstruction_error, bins=50, color='skyblue', edgecolor='black')
plt.title('Reconstruction Error Distribution')
plt.xlabel('Reconstruction Error')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)
plt.show()

# Identify anomalies based on the reconstruction error
threshold = np.percentile(reconstruction_error, 95)  # Set the threshold as the 95th percentile of the reconstruction error
anomalies = np.where(reconstruction_error > threshold)[0]

# Print the indices of the anomalies
print("Anomalies detected at indices:", anomalies)
