Loading metadata for future comparison

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
from scipy.signal import find_peaks
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load Metadata CSV from Google Drive
metadata_path = "/content/sample_data/df_video_metadata_demog.csv"  # Replace with your actual path

try:
    metadata = pd.read_csv(metadata_path)
    print("Metadata CSV loaded successfully:")
    print(metadata.head())  # Display first few rows
except FileNotFoundError:
    print(f"Error: File not found at {metadata_path}")
except pd.errors.ParserError:
    print(f"Error: Could not parse the CSV file at {metadata_path}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Metadata CSV loaded successfully:
      id  spo2  temperature  bps  bpd  pulse  age  height  weight  gender  \
0  IN585   NaN          NaN  147   80     68   60     150    52.0  Female   
1  IN602   NaN          NaN  138   77    101   52     146    50.0  Female   
2  IN606   NaN          NaN  117   85     70   32     163    73.0  Female   
3  IN614   NaN          NaN  132   84     90   48     163    70.0  Female   
4  IN660   NaN          NaN  164   84     76   75     167    55.0    Male   

   ...  category           Filename  \
0  ...         9  Copy of IN585.mp4   
1  ...         8  Copy of IN602.mp4   
2  ...         4  Copy of IN606.mp4   
3  ...         7  Copy of IN614.mp4   
4  ...        12  Copy of IN660.mp4   

                                            Filepath Duration (s)  \
0  /content/drive/MyDrive/sonocare/V-BPE Dataset ...      120.234   
1

Feature extraction using sample videos

In [13]:
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
import time
from concurrent.futures import ThreadPoolExecutor
from scipy.signal import find_peaks
from tqdm import tqdm  # Progress bar

# ✅ Set Google Drive video folder path
video_folder = "/content/sample_data/testvideo"

if not os.path.exists(video_folder):
    raise FileNotFoundError(f"Video folder not found: {video_folder}")

video_files = [os.path.join(video_folder, f) for f in os.listdir(video_folder) if f.endswith('.mp4')]

mp_face_mesh = mp.solutions.face_mesh

# 🚀 Optimized Pulse Extraction Function
def extract_pulse(video_path):
    start_time = time.time()  # ⏳ Start timing

    cap = cv2.VideoCapture(video_path)
    heart_rates = []
    frame_skip = 5  # Process every 5th frame

    if not cap.isOpened():
        print(f"⚠️ Cannot open video file: {video_path}")
        return None

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_skip == 0:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            h, w = gray.shape
            forehead = gray[int(0.2*h):int(0.3*h), int(0.3*w):int(0.7*w)]
            heart_rates.append(np.mean(forehead))  # Store mean intensity

        frame_count += 1

    cap.release()

    # Ensure valid data
    if len(heart_rates) == 0:
        return None

    peaks, _ = find_peaks(heart_rates, distance=30)
    duration_seconds = len(heart_rates) / 30  # Assuming 30 FPS

    if duration_seconds == 0:
        return None

    pulse_rate = (len(peaks) / duration_seconds) * 60  # Convert to BPM

    elapsed_time = time.time() - start_time  # ⏳ Time taken
    return {"video_id": os.path.basename(video_path).lower().replace("copy of ", "").replace(".mp4", ""),
            "pulse_rate": pulse_rate,
            "processing_time_sec": round(elapsed_time, 2)}

# ✅ Process Videos in Parallel
print("🚀 Extracting features from videos...")
start_time = time.time()
with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(tqdm(executor.map(extract_pulse, video_files), total=len(video_files)))

video_data = [res for res in results if res is not None]

if not video_data:
    raise ValueError("❌ No valid video data extracted. Check video files.")

# ✅ Save Video Features
video_df = pd.DataFrame(video_data)
video_features_path = "/content/drive/My Drive/video_features.csv"
video_df.to_csv(video_features_path, index=False)
print(f"✅ Video features saved to {video_features_path}")

# ⏳ Total Processing Time
total_time = time.time() - start_time
print(f"⏳ Total Video Processing Time: {round(total_time, 2)} sec")

# ✅ Print Sample Data
print(video_df.head())


🚀 Extracting features from videos...


100%|██████████| 5/5 [08:39<00:00, 103.99s/it]

✅ Video features saved to /content/drive/My Drive/video_features.csv
⏳ Total Video Processing Time: 520.0 sec
  video_id  pulse_rate  processing_time_sec
0    sl225   44.021739               490.19
1    sl191   48.452221               519.98
2    sl413   48.380427                75.38
3    sl496   46.196150                59.40
4    in901   42.265193                79.56





Predicting pulse rate using Random forest

In [21]:
import os
import cv2
import numpy as np
import pandas as pd
import time
import joblib  # ✅ For saving and loading the model
from concurrent.futures import ProcessPoolExecutor
from scipy.signal import find_peaks
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# ✅ Load Video Features for Training
video_features_path = "/content/drive/My Drive/video_features.csv"
df = pd.read_csv(video_features_path)

if df.empty:
    raise ValueError("❌ No data found in video features file!")

# ✅ Define Features & Target Variable
features = ["processing_time_sec"]  # You can add more if needed
target = "pulse_rate"

X = df[features]
y = df[target]

# ✅ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Train Random Forest Model
print("🚀 Training Random Forest model...")
model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

# ✅ Predictions & Evaluation
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)

print(f"✅ MAE (Pulse Rate Prediction): {mae:.2f} BPM")

# ✅ Save the Model
model_path = "/content/drive/My Drive/pulse_rate_model.pkl"
joblib.dump(model, model_path)
print(f"✅ Model saved at: {model_path}")

# -------------------------------------------------------------------------------
# **📌 FUNCTION TO PREDICT PULSE RATE FROM ANY NEW VIDEO**
# -------------------------------------------------------------------------------

def extract_features_from_video(video_path):
    """Extract features from a given video."""
    start_time = time.time()

    cap = cv2.VideoCapture(video_path)
    heart_rates = []
    frame_skip = 10

    if not cap.isOpened():
        print(f"⚠️ Cannot open video file: {video_path}")
        return None

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_skip == 0:
            frame = cv2.resize(frame, (640, 480))
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            h, w = gray.shape
            forehead = gray[int(0.2*h):int(0.3*h), int(0.3*w):int(0.7*w)]
            heart_rates.append(np.mean(forehead))

        frame_count += 1

    cap.release()

    if len(heart_rates) == 0:
        return None

    peaks, _ = find_peaks(heart_rates, distance=30)
    duration_seconds = len(heart_rates) / 30

    if duration_seconds == 0:
        return None

    pulse_rate = (len(peaks) / duration_seconds) * 60

    elapsed_time = time.time() - start_time
    return {"video_id": os.path.basename(video_path).replace(".mp4", ""),
            "processing_time_sec": round(elapsed_time, 2)}

def predict_pulse_rate(video_path):
    """Predict pulse rate from a new video using trained model."""
    model = joblib.load(model_path)
    features = extract_features_from_video(video_path)

    if features is None:
        print("❌ Could not extract features from video.")
        return None

    feature_df = pd.DataFrame([features])[["processing_time_sec"]]
    predicted_pulse = model.predict(feature_df)[0]

    print(f"🎯 Predicted Pulse Rate: {predicted_pulse:.2f} BPM")
    return predicted_pulse

# -------------------------------------------------------------------------------
# **📌 Example: Predict Pulse Rate from a New Video**
# -------------------------------------------------------------------------------
new_video = "/content/sample_data/testvideo/Copy of SL133.mp4"  # Change path to your test video
predicted_pulse = predict_pulse_rate(new_video)


🚀 Training Random Forest model...
✅ MAE (Pulse Rate Prediction): 4.74 BPM
✅ Model saved at: /content/drive/My Drive/pulse_rate_model.pkl
🎯 Predicted Pulse Rate: 44.00 BPM
