# **STEP 0** : Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import random
from sklearn.model_selection import train_test_split


In [None]:
print("TensorFlow Version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

TensorFlow Version: 2.19.0
GPU Available: []


In [None]:
# Path to the dataset (change according to your Colab setup)
DATASET_DIR = '/content/dataset/'

# Define frame size for CNN input
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3

# Number of frames to sample per video
FRAMES_PER_VIDEO = 20

# Random seed for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

print("Environment setup complete.")
print(f"Dataset path: {DATASET_DIR}")
print(f"Image size: {IMG_HEIGHT}x{IMG_WIDTH}x{IMG_CHANNELS}")
print(f"Frames per video: {FRAMES_PER_VIDEO}")

Environment setup complete.
Dataset path: /content/dataset/
Image size: 224x224x3
Frames per video: 20


# STEP 1 : Import of Data set and Extraction

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import zipfile

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("matthewjansen/ucf101-action-recognition")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'ucf101-action-recognition' dataset.
Path to dataset files: /kaggle/input/ucf101-action-recognition


In [None]:
import os

# STEP 1: Dataset Setup
# 'path' variable is the dataset folder returned by kagglehub.dataset_download
DATASET_DIR = "/root/.cache/kagglehub/datasets/matthewjansen/ucf101-action-recognition/versions/4"

# Inspect the contents
all_items = os.listdir(DATASET_DIR)
print("All items in dataset directory:", all_items)

All items in dataset directory: ['val', 'val.csv', 'test.csv', 'train.csv', 'train', 'test']


# Part A : Classical Machine Learing

Step A1:

In [None]:
NUM_FRAMES = 8
FRAME_SIZE = (112, 112)


In [None]:
#STEP A1.1 — Imports
import cv2
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

In [None]:
#STEP A1.2 — Frame Sampling Function
def sample_frames(video_path, num_frames=8, size=(112, 112)):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if total_frames == 0:
        cap.release()
        return None

    frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int)
    frames = []

    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = cv2.resize(frame, size)
        frames.append(frame)

    cap.release()

    if len(frames) < num_frames:
        return None

    return np.array(frames)

In [None]:
#STEP A1.3 — Feature Extraction from Frames
def extract_motion_features(frames):
    features = []

    # Appearance features
    features.append(frames.mean())
    features.append(frames.std())

    # Motion features (frame differences)
    diffs = np.abs(np.diff(frames, axis=0))
    features.append(diffs.mean())
    features.append(diffs.std())

    return np.array(features)

In [None]:
#STEP A1.4 — Build Dataset Features (TRAIN / VAL / TEST)
def build_feature_dataset(df, root_path):
    X = []
    y = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        video_path = os.path.join(root_path, row['clip_path'].lstrip('/'))

        frames = sample_frames(video_path, NUM_FRAMES, FRAME_SIZE)
        if frames is None:
            continue

        feats = extract_motion_features(frames)
        X.append(feats)
        y.append(row['label'])

    return np.array(X), np.array(y)

In [None]:
#STEP A1.5 — Run Feature Extraction

X_train, y_train = build_feature_dataset(train_df, path)
X_val, y_val     = build_feature_dataset(val_df, path)
X_test, y_test   = build_feature_dataset(test_df, path)

print("Train features shape:", X_train.shape)
print("Val features shape:", X_val.shape)
print("Test features shape:", X_test.shape)

100%|██████████| 10055/10055 [19:12<00:00,  8.73it/s]
100%|██████████| 1673/1673 [03:14<00:00,  8.61it/s]
100%|██████████| 1723/1723 [03:16<00:00,  8.75it/s]

Train features shape: (10012, 4)
Val features shape: (1669, 4)
Test features shape: (1715, 4)





Step A2:

In [None]:
#STEP A2.1 — Encode Labels (IMPORTANT)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y_train_enc = le.fit_transform(y_train)
y_val_enc   = le.transform(y_val)
y_test_enc  = le.transform(y_test)

print("Class mapping:")
for i, c in enumerate(le.classes_):
    print(i, "->", c)

Class mapping:
0 -> ApplyEyeMakeup
1 -> ApplyLipstick
2 -> Archery
3 -> BabyCrawling
4 -> BalanceBeam
5 -> BandMarching
6 -> BaseballPitch
7 -> Basketball
8 -> BasketballDunk
9 -> BenchPress
10 -> Biking
11 -> Billiards
12 -> BlowDryHair
13 -> BlowingCandles
14 -> BodyWeightSquats
15 -> Bowling
16 -> BoxingPunchingBag
17 -> BoxingSpeedBag
18 -> BreastStroke
19 -> BrushingTeeth
20 -> CleanAndJerk
21 -> CliffDiving
22 -> CricketBowling
23 -> CricketShot
24 -> CuttingInKitchen
25 -> Diving
26 -> Drumming
27 -> Fencing
28 -> FieldHockeyPenalty
29 -> FloorGymnastics
30 -> FrisbeeCatch
31 -> FrontCrawl
32 -> GolfSwing
33 -> Haircut
34 -> HammerThrow
35 -> Hammering
36 -> HandstandPushups
37 -> HandstandWalking
38 -> HeadMassage
39 -> HighJump
40 -> HorseRace
41 -> HorseRiding
42 -> HulaHoop
43 -> IceDancing
44 -> JavelinThrow
45 -> JugglingBalls
46 -> JumpRope
47 -> JumpingJack
48 -> Kayaking
49 -> Knitting
50 -> LongJump
51 -> Lunges
52 -> MilitaryParade
53 -> Mixing
54 -> MoppingFloor
55 -

In [None]:
#Step A2.2 - SVM + GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

svm = SVC()

svm_params = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto']
}

svm_grid = GridSearchCV(
    svm,
    svm_params,
    cv=5,
    scoring='f1_macro',
    n_jobs=-1
)

svm_grid.fit(X_train, y_train_enc)

print("Best SVM params:", svm_grid.best_params_)

In [None]:
#Evaluate SVM on Test Set
from sklearn.metrics import classification_report, confusion_matrix

svm_best = svm_grid.best_estimator_
svm_preds = svm_best.predict(X_test)

print("SVM Classification Report:")
print(classification_report(y_test_enc, svm_preds, target_names=le.classes_))

In [None]:
#STEP A2.3 — Model 2: Random Forest

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)

rf_params = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10]
}

rf_grid = GridSearchCV(
    rf,
    rf_params,
    cv=5,
    scoring='f1_macro',
    n_jobs=-1
)

rf_grid.fit(X_train, y_train_enc)

print("Best RF params:", rf_grid.best_params_)

In [None]:
#Evaluate RF on Test Set
rf_best = rf_grid.best_estimator_
rf_preds = rf_best.predict(X_test)

print("Random Forest Classification Report:")
print(classification_report(y_test_enc, rf_preds, target_names=le.classes_))

In [None]:
#STEP A2.4 — Feature Importance (VERY IMPORTANT)
import matplotlib.pyplot as plt

feature_names = [
    'mean_intensity',
    'std_intensity',
    'mean_motion',
    'std_motion'
]

importances = rf_best.feature_importances_

plt.bar(feature_names, importances)
plt.title("Feature Importance (Random Forest)")
plt.ylabel("Importance")
plt.show()

Step A3:

In [None]:
#step A3.1
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

def evaluate_model(y_true, y_pred, model_name):
    print(f"--- {model_name} Evaluation ---")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("F1 Macro:", f1_score(y_true, y_pred, average='macro'))
    print("Precision Macro:", precision_score(y_true, y_pred, average='macro'))
    print("Recall Macro:", recall_score(y_true, y_pred, average='macro'))
    print("\n")


In [None]:
evaluate_model(y_test_enc, svm_preds, "SVM")
evaluate_model(y_test_enc, rf_preds, "Random Forest")


In [None]:
#STEP A3.2 — Confusion Matrix

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test_enc, dl_preds)
plt.figure(figsize=(12,10))
sns.heatmap(cm, cmap="Blues", annot=False)
plt.title("CNN+LSTM Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
#STEP A3.3 — Statistical Significance Testing (Optional, Advanced)

from mlxtend.evaluate import paired_ttest_5x2cv
# X_test_features, y_test_enc
t, p = paired_ttest_5x2cv(estimator1=rf_best, estimator2=dl_model, X=X_test, y=y_test_enc)
print("Paired t-test result: t =", t, ", p =", p)

# Part A : Classical Machine Learing

In [None]:
#STEP B1 — Select Subset of Classes

selected_classes = [
    "Basketball", "TennisSwing", "SoccerJuggling", "JumpRope",
    "Swing", "WalkingWithDog", "Kayaking", "HighJump", "PullUps"
]

In [None]:
#STEP B2 — Prepare DataFrame Subset

train_df_subset = train_df[train_df['label'].isin(selected_classes)].reset_index(drop=True)
val_df_subset = val_df[val_df['label'].isin(selected_classes)].reset_index(drop=True)
test_df_subset = test_df[test_df['label'].isin(selected_classes)].reset_index(drop=True)

In [None]:
#STEP B3 — Frame Extraction

import cv2
import numpy as np

def extract_frames(video_path, max_frames=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // max_frames)
    for i in range(max_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i*step)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (64,64)) / 255.0  # normalize
        frames.append(frame)
    cap.release()
    # Pad frames if less than max_frames
    while len(frames) < max_frames:
        frames.append(np.zeros((64,64,3)))
    return np.array(frames)

In [None]:
#STEP B4 — Encode Labels

#Classification label: Movement exists or not

train_df_subset['movement'] = 1  # All selected classes → movement


#Regression label: Movement type index (for LSTM to predict)

label_map = {cls:i for i, cls in enumerate(selected_classes)}
train_df_subset['movement_type'] = train_df_subset['label'].map(label_map)

In [None]:
#STEP B5 — Create Data Generator

import tensorflow as tf

class VideoDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size=8, max_frames=16, shuffle=True):
        self.df = df
        self.batch_size = batch_size
        self.max_frames = max_frames
        self.shuffle = shuffle
        self.indexes = np.arange(len(df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df)/self.batch_size))

    def __getitem__(self, idx):
        batch_idx = self.indexes[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_df = self.df.iloc[batch_idx]
        X = np.array([extract_frames(f"/content/dataset{row['clip_path']}", self.max_frames) for _, row in batch_df.iterrows()])
        y_class = np.array(batch_df['movement'])
        y_reg = np.array(batch_df['movement_type'])
        return X, {"classification": y_class, "regression": y_reg}

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [None]:
#STEP B6 — Build CNN+LSTM Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, LSTM, Dense, Dropout

max_frames, H, W, C = 16, 64, 64, 3

# Base CNN (pretrained)
cnn_base = MobileNetV2(weights='imagenet', include_top=False, input_shape=(H,W,C), pooling='avg')

for layer in cnn_base.layers:
    layer.trainable = False  # freeze CNN

# Video Input
video_input = Input(shape=(max_frames, H, W, C))
x = TimeDistributed(cnn_base)(video_input)
x = LSTM(128)(x)
x = Dropout(0.5)(x)

# Dual Output: classification + regression
class_output = Dense(1, activation='sigmoid', name='classification')(x)  # movement yes/no
reg_output = Dense(len(selected_classes), activation='softmax', name='regression')(x)  # movement type

model = Model(inputs=video_input, outputs=[class_output, reg_output])
model.compile(optimizer='adam',
              loss={'classification':'binary_crossentropy', 'regression':'sparse_categorical_crossentropy'},
              metrics={'classification':'accuracy', 'regression':'accuracy'})
model.summary()

In [None]:
#STEP B7 — Train the Model
train_gen = VideoDataGenerator(train_df_subset, batch_size=8)
val_gen = VideoDataGenerator(val_df_subset, batch_size=8)

history = model.fit(train_gen, validation_data=val_gen, epochs=3)

In [None]:
#STEP B8 — Evaluate on Test Set
test_gen = VideoDataGenerator(test_df_subset, batch_size=8, shuffle=False)
results = model.evaluate(test_gen)
print("Test Loss & Accuracy:", results)

Evaluation

In [None]:
#STEP 1 — Plot Training Curves
import matplotlib.pyplot as plt

def plot_training(history, metric='accuracy'):
    # Classification
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(history.history['classification_accuracy'], label='train')
    plt.plot(history.history['val_classification_accuracy'], label='val')
    plt.title('Classification Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['regression_accuracy'], label='train')
    plt.plot(history.history['val_regression_accuracy'], label='val')
    plt.title('Regression (Movement Type) Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

plot_training(history)

In [None]:
#STEP 2 — Evaluate on Test Set
test_gen = VideoDataGenerator(test_df_subset, batch_size=8, shuffle=False)
results = model.evaluate(test_gen)
print("Test Loss & Accuracy (classification, regression):", results)

In [None]:
#STEP 3 — Predictions on Test Set
import numpy as np

y_true_class = []
y_true_reg = []
y_pred_class = []
y_pred_reg = []

for X, y in test_gen:
    preds_class, preds_reg = model.predict(X)
    y_true_class.extend(y['classification'])
    y_true_reg.extend(y['regression'])
    y_pred_class.extend((preds_class > 0.5).astype(int).flatten())
    y_pred_reg.extend(np.argmax(preds_reg, axis=1))

In [None]:
#STEP 4 — Classification Metrics
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

print("--- Movement Detection (Classification) ---")
print("Accuracy:", accuracy_score(y_true_class, y_pred_class))
print("F1 Macro:", f1_score(y_true_class, y_pred_class, average='macro'))
print("Precision Macro:", precision_score(y_true_class, y_pred_class, average='macro'))
print("Recall Macro:", recall_score(y_true_class, y_pred_class, average='macro'))

In [None]:
#STEP 5 — Regression Metrics (Movement Type)
print("--- Movement Type (Regression / Multi-class Classification) ---")
print("Accuracy:", accuracy_score(y_true_reg, y_pred_reg))
print("F1 Macro:", f1_score(y_true_reg, y_pred_reg, average='macro'))

In [None]:
#STEP 6 — Confusion Matrix for Movement Types
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_true_reg, y_pred_reg)
plt.figure(figsize=(12,10))
sns.heatmap(cm, cmap="Blues", annot=False)
plt.title("Movement Type Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Comparative Analysis

In [None]:
#Step 1 — Collect Metrics

# Classical ML
metrics_classical = {
    "RandomForest": {
        "Accuracy": accuracy_score(y_test, y_pred_rf),
        "F1_macro": f1_score(y_test, y_pred_rf, average='macro')
    },
    "SVM": {
        "Accuracy": accuracy_score(y_test, y_pred_svm),
        "F1_macro": f1_score(y_test, y_pred_svm, average='macro')
    }
}

# Deep Learning
metrics_deep = {
    "CNN_LSTM": {
        "Classification Accuracy": accuracy_score(y_true_class, y_pred_class),
        "Classification F1_macro": f1_score(y_true_class, y_pred_class, average='macro'),
        "Regression Accuracy": accuracy_score(y_true_reg, y_pred_reg),
        "Regression F1_macro": f1_score(y_true_reg, y_pred_reg, average='macro')
    }
}

In [None]:
#Step 2 — Summary Table
import pandas as pd

summary = pd.DataFrame({
    "Model": ["Random Forest", "SVM", "CNN+LSTM (Movement Detection)", "CNN+LSTM (Movement Type)"],
    "Accuracy": [
        metrics_classical["RandomForest"]["Accuracy"],
        metrics_classical["SVM"]["Accuracy"],
        metrics_deep["CNN_LSTM"]["Classification Accuracy"],
        metrics_deep["CNN_LSTM"]["Regression Accuracy"]
    ],
    "F1 Macro": [
        metrics_classical["RandomForest"]["F1_macro"],
        metrics_classical["SVM"]["F1_macro"],
        metrics_deep["CNN_LSTM"]["Classification F1_macro"],
        metrics_deep["CNN_LSTM"]["Regression F1_macro"]
    ]
})

print(summary)

In [None]:
#Step 3 — Visualization
import matplotlib.pyplot as plt

# Accuracy comparison
plt.figure(figsize=(8,5))
plt.bar(summary["Model"], summary["Accuracy"], color=['skyblue','orange','green','red'])
plt.title("Model Accuracy Comparison")
plt.ylabel("Accuracy")
plt.xticks(rotation=30)
plt.ylim(0,1)
plt.show()

# F1 Macro comparison
plt.figure(figsize=(8,5))
plt.bar(summary["Model"], summary["F1 Macro"], color=['skyblue','orange','green','red'])
plt.title("Model F1 Macro Comparison")
plt.ylabel("F1 Score (Macro)")
plt.xticks(rotation=30)
plt.ylim(0,1)
plt.show()

In [None]:
!rm -rf /content/Human_Activity_Recognition_Project



In [None]:
!git clone https://github.com/AlinaRiaz/Human_Activity_Recognition_Project.git


Cloning into 'Human_Activity_Recognition_Project'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (3/3), done.


In [None]:
%cd /content/Human_Activity_Recognition_Project


/content/Human_Activity_Recognition_Project


In [None]:
!ls /content


Human_Activity_Recognition_Project


In [None]:
%cd /content/Human_Activity_Recognition_Project


/content/Human_Activity_Recognition_Project
