In [8]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from twilio.rest import Client


from tensorflow.keras.utils import custom_object_scope
from tensorflow.keras.layers import Layer, Conv2D, Multiply, Add, Dense
import tensorflow as tf

class SpatialAttention(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conv = Conv2D(1, kernel_size=7, padding='same', activation='sigmoid')

    def call(self, inputs):
        
        avg_pool = tf.reduce_mean(inputs, axis=-1, keepdims=True)
        max_pool = tf.reduce_max(inputs, axis=-1, keepdims=True)
        
        concat = tf.concat([avg_pool, max_pool], axis=-1)
        
        attention = self.conv(concat)
        
        return Multiply()([inputs, attention])


class ChannelAttention(Layer):
    def __init__(self, reduction_ratio=8, **kwargs):
        super().__init__(**kwargs)
        self.reduction_ratio = reduction_ratio

    def build(self, input_shape):
        
        self.dense1 = Dense(input_shape[-1] // self.reduction_ratio, activation='relu')
        self.dense2 = Dense(input_shape[-1], activation='sigmoid')

    def call(self, inputs):
        
        avg_pool = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True)
        max_pool = tf.reduce_max(inputs, axis=[1, 2], keepdims=True)

        avg_out = self.dense2(self.dense1(avg_pool))
        max_out = self.dense2(self.dense1(max_pool))

        attention = Add()([avg_out, max_out])
        return Multiply()([inputs, attention])

with custom_object_scope({'SpatialAttention': SpatialAttention, 'ChannelAttention': ChannelAttention}):
    action_recognition_model = load_model("alternate_attention_model_with_flattening.h5")

fall_detection_model = load_model("model_attention.h5")  

ACTION_CATEGORIES = {
    0: 'Fall forward',
    1: 'Fall backwards',
    2: 'Fall left',
    3: 'Fall right',
    4: 'Fall sitting',
    5: 'Walk',
    6: 'Hop',
    7: 'Pick up object',
    8: 'Sit down',
    9: 'Kneel'
}


ACCOUNT_SID = 'XXXXXXXXXXXXXXXXXXXXXXXXX'
AUTH_TOKEN = XXXXXXXXXXXXXXXXXXXXXXXXXXXc'
TWILIO_PHONE = '+XXXXXXXXXX'
RECIPIENT_PHONE = '+XXXXXXXXXXXXX'

client = Client(ACCOUNT_SID, AUTH_TOKEN)

def open_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")
    return cap

def send_sms_alert(message, to_phone):
    try:
        client.messages.create(
            body=message,
            from_=TWILIO_PHONE,
            to=to_phone
        )
        print("SMS Alert Sent!")
    except Exception as e:
        print(f"Failed to send SMS: {e}")

def predict_action(frame, model):
    try:
        frame_resized = cv2.resize(frame, (112, 112))
        frame_normalized = frame_resized / 255.0
        frame_batch = np.expand_dims(frame_normalized, axis=0)

        predictions = model.predict(frame_batch)
        predicted_class = np.argmax(predictions)
        print(f"Predictions: {predictions}, Predicted Class: {predicted_class}")
        predicted_action = ACTION_CATEGORIES.get(predicted_class, "Unknown Action")
        return predicted_action, predictions[0]
    except Exception as e:
        print(f"Error predicting action: {e}")
        return "Error", None



def predict_fall(video_path, model, batch_size=4):
    cap = cv2.VideoCapture(video_path)
    frames_batch = []
    fall_predictions = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_resized = cv2.resize(frame, (224, 224))  
        frames_batch.append(frame_resized)

        if len(frames_batch) == batch_size:
            batch_array = np.array(frames_batch)
            predictions = model.predict(batch_array)
            fall_predictions.extend(np.argmax(predictions, axis=1))
            frames_batch = []

    if len(frames_batch) > 0: 
        batch_array = np.array(frames_batch)
        predictions = model.predict(batch_array)
        fall_predictions.extend(np.argmax(predictions, axis=1))

    cap.release()
   
    fall_count = fall_predictions.count(1)
    non_fall_count = fall_predictions.count(0)
    final_prediction = "Fall" if fall_count > non_fall_count else "Non-fall"
    return final_prediction

def process_video_for_action(video_path, model, start_frame=40, frame_skip=10):
    cap = open_video(video_path)  
    frame_count = 0
    actions = []

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_duration = total_frames / fps  

    print(f"Processing video: {video_path}")
    print(f"FPS: {fps}, Total Frames: {total_frames}, Duration: {total_duration:.2f} seconds")

    output_path = "output_with_action_labels.avi"
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break  

        if frame_count >= start_frame and frame_count % frame_skip == 0:  # Start predicting after `start_frame`
            predicted_action, _ = predict_action(frame, model) 
            actions.append(predicted_action) 

            label = f"Action: {predicted_action}"
            print(f"Frame {frame_count}: {label}") 

            text_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
            text_width, text_height = text_size
            cv2.rectangle(frame, (10, 10), (10 + text_width, 30 + text_height), (0, 0, 0), -1)  
            cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) 

        out.write(frame)

        cv2.imshow("Action Recognition", frame)

        if cv2.waitKey(300) & 0xFF == ord('q'): 
            print("Video processing interrupted by user.")
            break

        if frame_count > (fps * 300): 
            print("Stopping after 50 seconds of playback.")
            break

        frame_count += 1

    cap.release() 
    out.release()  
    cv2.destroyAllWindows()  
    return actions


def process_video_pipeline(video_path):
    try:
        print("Starting Action Recognition...")
        actions = process_video_for_action(video_path, action_recognition_model)
        print(f"Actions detected: {actions}")

        print("\nStarting Fall Detection...")
        fall_prediction = predict_fall(video_path, fall_detection_model)
        print(f"Fall Detection Result: {fall_prediction}")

        if fall_prediction == "Fall":
            alert_message = f"Alert! A fall has been detected "
            send_sms_alert(alert_message, RECIPIENT_PHONE)

    except Exception as e:
        print(f"Error in processing pipeline: {e}")

video_path = r"C:\Users\mgree\Downloads\F1.avi"
process_video_pipeline(video_path)




Starting Action Recognition...
Processing video: C:\Users\mgree\Downloads\F1.avi
FPS: 20.0, Total Frames: 212, Duration: 10.60 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Predictions: [[4.4350110e-12 1.3825411e-07 9.9999988e-01 1.4700746e-11 6.9757072e-20
  9.2104822e-12 1.5204419e-14 4.4069060e-16 1.6993846e-14 5.6892245e-17]], Predicted Class: 2
Frame 40: Action: Fall left
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predictions: [[2.9197856e-14 4.4646047e-08 9.9999928e-01 2.1693753e-09 1.8539154e-18
  6.6428493e-07 4.6437021e-12 1.4012966e-11 2.8334939e-12 1.3068129e-14]], Predicted Class: 2
Frame 50: Action: Fall left
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Predictions: [[1.8475749e-08 4.2433576e-06 9.9999392e-01 2.1356779e-09 9.2258740e-10
  1.7105975e-06 7.0671451e-09 3.1926334e-08 3.0696974e-09 5.2321916e-08]], Predicted Class: 2
Frame 60: Action: Fall left
[1m1/1[0m [32m━━━━━━━━━━━