In [4]:
# STEP 0: Setup
!pip uninstall -y mediapipe -q
!pip cache purge
!pip install mediapipe==0.10.9 yt-dlp opencv-python -q

# STEP 1: Restart runtime AFTER this cell runs (Runtime > Restart Runtime), then run the rest below.

# STEP 2: Mount Google Drive and setup folders
from google.colab import drive
import os
import cv2
import mediapipe as mp
import yt_dlp

drive.mount('/content/drive')

base_path = '/content/drive/MyDrive/Projects/DogTensionPredictor'
video_dir = os.path.join(base_path, 'videos')
frame_dir = os.path.join(base_path, 'frames')
keypoint_dir = os.path.join(base_path, 'keypoints')

for folder in [video_dir, frame_dir, keypoint_dir]:
    os.makedirs(folder, exist_ok=True)

# STEP 3: Download videos
video_links = {
    'friendly_dog1.mp4': 'https://www.youtube.com/watch?v=vHIfTUbSmGY',
    'friendly_dog2.mp4': 'https://www.youtube.com/watch?v=kVwaudxp0FI',
    'friendly_dog3.mp4': 'https://www.youtube.com/watch?v=jzzE6glci94',
    'nervous_dog1.mp4': 'https://www.youtube.com/watch?v=OjKdNC55bHI',
    'nervous_dog2.mp4': 'https://www.youtube.com/watch?v=DONKRpZl0vI',
    'nervous_dog3.mp4': 'https://www.youtube.com/watch?v=OtwOkCc8Uow',
    'aggressive_dog1.mp4': 'https://www.youtube.com/watch?v=7AA3rZXiFO8',
    'aggressive_dog2.mp4': 'https://www.youtube.com/watch?v=WfHl-lO7veI',
    'aggressive_dog3.mp4': 'https://www.youtube.com/watch?v=M1FTVctuQvs',
    'aggressive_dog4.mp4': 'https://www.youtube.com/watch?v=2H4km6wzEdI'
}
for filename, url in video_links.items():
    yt_dlp.YoutubeDL({'outtmpl': f'{video_dir}/{filename}'}).download([url])


Files removed: 12
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[youtube] Extracting URL: https://www.youtube.com/watch?v=vHIfTUbSmGY
[youtube] vHIfTUbSmGY: Downloading webpage
[youtube] vHIfTUbSmGY: Downloading tv client config
[youtube] vHIfTUbSmGY: Downloading tv player API JSON
[youtube] vHIfTUbSmGY: Downloading ios player API JSON
[youtube] vHIfTUbSmGY: Downloading m3u8 information
[info] vHIfTUbSmGY: Downloading 1 format(s): 399+251
[download] /content/drive/MyDrive/Projects/DogTensionPredictor/videos/friendly_dog1.mp4.webm has already been downloaded
[youtube] Extracting URL: https://www.youtube.com/watch?v=kVwaudxp0FI
[youtube] kVwaudxp0FI: Downloading webpage
[youtube] kVwaudxp0FI: Downloading tv client config
[youtube] kVwaudxp0FI: Downloading tv player API JSON
[youtube]

In [5]:
# STEP 4: Extract every 10th frame
def extract_frames(video_path, output_folder, step=10):
    cap = cv2.VideoCapture(video_path)
    frame_num = 0
    saved = 0
    name = os.path.splitext(os.path.basename(video_path))[0]
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_num % step == 0:
            save_path = os.path.join(output_folder, f"{name}_frame{saved}.jpg")
            cv2.imwrite(save_path, frame)
            saved += 1
        frame_num += 1
    cap.release()

for file in os.listdir(video_dir):
    if file.endswith(".mp4") or file.endswith(".webm"):
        extract_frames(os.path.join(video_dir, file), frame_dir)

# STEP 5: Apply MediaPipe Pose Detection
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

for frame in os.listdir(frame_dir):
    img_path = os.path.join(frame_dir, frame)
    image = cv2.imread(img_path)
    if image is None:
        continue
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = pose.process(image_rgb)

    if result.pose_landmarks:
        mp_drawing.draw_landmarks(image, result.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        save_path = os.path.join(keypoint_dir, frame.replace('.jpg', '_pose.jpg'))
        cv2.imwrite(save_path, image)


In [6]:
# STEP 6: Extract Pose Keypoints from Dog Frames into CSV Format
# This will create a structured dataset with (x, y, z, visibility) for all 33 pose landmarks per frame

import pandas as pd

# Initialize MediaPipe Pose
pose = mp_pose.Pose(static_image_mode=True)

# This list will hold the rows for the CSV
landmark_rows = []

# Loop through each image/frame in the 'frames' folder
for frame in sorted(os.listdir(frame_dir)):
    img_path = os.path.join(frame_dir, frame)
    image = cv2.imread(img_path)
    if image is None:
        continue

    # Convert BGR to RGB for MediaPipe
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = pose.process(image_rgb)

    # If pose landmarks are detected
    if result.pose_landmarks:
        # Create a row with video name and frame name
        row = {
            'video': frame.split('_frame')[0],   # e.g. friendly_dog1
            'frame': frame                       # e.g. friendly_dog1_frame3.jpg
        }

        # Add all 33 keypoints (x, y, z, visibility) to the row
        for i, lm in enumerate(result.pose_landmarks.landmark):
            row[f'x{i}'] = lm.x
            row[f'y{i}'] = lm.y
            row[f'z{i}'] = lm.z
            row[f'v{i}'] = lm.visibility

        # Append row to list
        landmark_rows.append(row)

# Convert the list of rows to a DataFrame
df = pd.DataFrame(landmark_rows)

# Save the DataFrame as CSV to your Drive project folder
csv_path = os.path.join(base_path, 'pose_landmarks.csv')
df.to_csv(csv_path, index=False)

print(f"Pose keypoint data saved to:\n{csv_path}")


Pose keypoint data saved to:
/content/drive/MyDrive/Projects/DogTensionPredictor/pose_landmarks.csv


In [7]:
# STEP 7: Add labels based on video name and train a simple classifier

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load the CSV
csv_path = os.path.join(base_path, 'pose_landmarks.csv')
df = pd.read_csv(csv_path)

# STEP 7.1: Create labels from video name
def get_label(video_name):
    if 'friendly' in video_name:
        return 'friendly'
    elif 'nervous' in video_name:
        return 'nervous'
    elif 'aggressive' in video_name:
        return 'aggressive'
    else:
        return 'unknown'

df['label'] = df['video'].apply(get_label)

# STEP 7.2: Drop 'video' and 'frame' columns — not needed for training
X = df.drop(columns=['video', 'frame', 'label'])
y = df['label']

# STEP 7.3: Encode labels as numbers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # friendly=0, nervous=1, aggressive=2

# STEP 7.4: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# STEP 7.5: Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# STEP 7.6: Evaluate the model
y_pred = clf.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))
print("Accuracy:", accuracy_score(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

  aggressive       0.94      0.77      0.85        22
    friendly       0.82      0.82      0.82        33
     nervous       0.85      0.91      0.88        55

    accuracy                           0.85       110
   macro avg       0.87      0.83      0.85       110
weighted avg       0.86      0.85      0.85       110

Accuracy: 0.8545454545454545


In [9]:
# STEP 8: User uploads a short video and we predict its mood

from google.colab import files
from collections import Counter

# STEP 8.1: Upload video manually
uploaded = files.upload()  # User uploads video (e.g. dog_test.mp4)

# STEP 8.2: Save it to a temp folder
import shutil

uploaded_video_path = list(uploaded.keys())[0]
test_video_path = os.path.join(base_path, 'test_input.mp4')
shutil.move(uploaded_video_path, test_video_path)

# STEP 8.3: Extract frames from uploaded video
temp_frame_dir = os.path.join(base_path, 'temp_frames')
os.makedirs(temp_frame_dir, exist_ok=True)

def extract_test_frames(video_path, output_folder, step=10):
    cap = cv2.VideoCapture(video_path)
    frame_num = 0
    saved = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_num % step == 0:
            cv2.imwrite(os.path.join(output_folder, f"frame_{saved}.jpg"), frame)
            saved += 1
        frame_num += 1
    cap.release()

extract_test_frames(test_video_path, temp_frame_dir)

# STEP 8.4: Extract pose keypoints from test frames
pose = mp_pose.Pose(static_image_mode=True)
test_rows = []

for frame in sorted(os.listdir(temp_frame_dir)):
    img_path = os.path.join(temp_frame_dir, frame)
    image = cv2.imread(img_path)
    if image is None:
        continue
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = pose.process(image_rgb)

    if result.pose_landmarks:
        row = {}
        for i, lm in enumerate(result.pose_landmarks.landmark):
            row[f'x{i}'] = lm.x
            row[f'y{i}'] = lm.y
            row[f'z{i}'] = lm.z
            row[f'v{i}'] = lm.visibility
        test_rows.append(row)

# STEP 8.5: Convert to DataFrame and predict using your model
test_df = pd.DataFrame(test_rows)
preds = clf.predict(test_df)

# STEP 8.6: Convert numeric predictions back to label names
labels = le.inverse_transform(preds)

# STEP 8.7: Majority voting for final result
final_label = Counter(labels).most_common(1)[0][0]
print("Frame-wise prediction:", list(labels))
print(f"Final predicted mood: {final_label.upper()}")


Saving WhatsApp Video 2025-06-07 at 11.49.37 AM.mp4 to WhatsApp Video 2025-06-07 at 11.49.37 AM.mp4
Frame-wise prediction: ['aggressive', 'friendly', 'friendly', 'friendly', 'friendly', 'friendly', 'friendly', 'friendly', 'friendly', 'aggressive', 'nervous', 'friendly', 'aggressive', 'friendly', 'friendly']
Final predicted mood: **FRIENDLY**


In [19]:
import os

project_path = "/content/drive/MyDrive/Projects/DogTensionPredictor"
print("Project folder contents:", os.listdir(project_path))


Project folder contents: ['data', 'frames', 'keypoints', 'model', 'notebooks', 'PS.gdoc', 'videos', 'dog_pose_extractor.py.ipynb', 'pose_landmarks.csv', 'test_input.mp4', 'temp_frames', 'app']


In [20]:
import pickle

with open(os.path.join(model_folder, "model.pkl"), "wb") as f:
    pickle.dump(clf, f)

with open(os.path.join(model_folder, "encoder.pkl"), "wb") as f:
    pickle.dump(le, f)

print("Model and encoder saved successfully!")


Model and encoder saved successfully!
