In [1]:
!pip install tensorflow==2.5.0 opencv-python matplotlib   twilio moviepy librosa



In [3]:
!pip install sounddevice

Collecting sounddevice
  Downloading sounddevice-0.4.6-py3-none-win_amd64.whl (199 kB)
     -------------------------------------- 199.7/199.7 kB 1.3 MB/s eta 0:00:00
Installing collected packages: sounddevice
Successfully installed sounddevice-0.4.6


In [8]:
import tensorflow as tf
from matplotlib import pyplot as plt
import cv2
import glob
import os
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile, join
from datetime import datetime
import pickle
import sounddevice as sd
import featurecalc

In [6]:
interpreter=tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()

In [None]:
# Function to calculate centroid from keypoints
def calculate_centroid(keypoints):
    x_coords = keypoints[:, 0]
    y_coords = keypoints[:, 1]
    centroid_x = np.mean(x_coords)
    centroid_y = np.mean(y_coords)
    return np.array([centroid_x, centroid_y])

# Function to calculate angles between keypoints
def calculate_angles(keypoints):
    # Calculate angles between hip, knee, and ankle keypoints for each leg
    left_hip, left_knee, left_ankle = keypoints[11], keypoints[12], keypoints[13]
    right_hip, right_knee, right_ankle = keypoints[8], keypoints[9], keypoints[10]
    left_leg_angle = np.arctan2(left_ankle[1] - left_knee[1], left_ankle[0] - left_knee[0]) \
                      - np.arctan2(left_hip[1] - left_knee[1], left_hip[0] - left_knee[0])
    right_leg_angle = np.arctan2(right_ankle[1] - right_knee[1], right_ankle[0] - right_knee[0]) \
                       - np.arctan2(right_hip[1] - right_knee[1], right_hip[0] - right_knee[0])
    # Calculate angles between shoulder, elbow, and wrist keypoints for each arm
    left_shoulder, left_elbow, left_wrist = keypoints[5], keypoints[6], keypoints[7]
    right_shoulder, right_elbow, right_wrist = keypoints[2], keypoints[3], keypoints[4]
    left_arm_angle = np.arctan2(left_wrist[1] - left_elbow[1], left_wrist[0] - left_elbow[0]) \
                      - np.arctan2(left_elbow[1] - left_shoulder[1], left_elbow[0] - left_shoulder[0])
    right_arm_angle = np.arctan2(right_wrist[1] - right_elbow[1], right_wrist[0] - right_elbow[0]) \
                       - np.arctan2(right_elbow[1] - right_shoulder[1], right_elbow[0] - right_shoulder[0])
    return np.array([left_leg_angle, right_leg_angle, left_arm_angle, right_arm_angle])

# Function to check if feet keypoints are in contact with the ground
def check_feet_contact(keypoints):
    left_foot, right_foot = keypoints[14], keypoints[11]
    if left_foot[1] > right_foot[1]:
        lower_foot = right_foot
        higher_foot = left_foot
    else:
        lower_foot = left_foot
        higher_foot = right_foot
    return lower_foot[1] <= higher_foot[1]



In [7]:
#Drawing the keypoints
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1) 

In [8]:
#This list represents all the different connections present in our body
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [9]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

In [9]:
import cv2
import numpy as np
import pyaudio
import librosa
import librosa.display

# Set up real-time capturing of audio
# Create a PyAudio object for audio capturing
rate = 44100  # Sample rate 16KHz for usb microphone
channels = 1  # Number of channels (mono) 1
format = pyaudio.paFloat32  # Format of the audio data pyaudio.paInt16

p = pyaudio.PyAudio()
stream = p.open(format=format,
                channels=channels,
                rate=rate,
                input=True,
                frames_per_buffer=1024)

# Set up real-time capturing of video
cap = cv2.VideoCapture(0)

 
file_path = 'E:/sightica/final_dataset/clf.pkl'
with open(file_path, 'rb') as f:
        clf = pickle.load(f)
        
    
    
file_path = 'E:/sightica/final_dataset/gradient_booster.pkl'
with open(file_path, 'rb') as f:
        gradient_booster = pickle.load(f)
        
   


# Loop for real-time processing
while True:
    # Capture video frame
    ret, frame = cap.read()

    # Extract video features from frame
   # Reshape image
    img = frame.copy()
    
     #Video to audio
   
    
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    keypoints=keypoints_with_scores.reshape([17,3])
    key=keypoints.reshape([1,51])
    keypoints_data = keypoints_with_scores.reshape([17,3])[:, :2]
    centroid = featurecalc.calculate_centroid(keypoints_data)
    keypoints_data = np.array(keypoints_data)
    keypoints_data = np.squeeze(keypoints_data)  # remove dimensions of size 1
    angles = featurecalc.calculate_angles(keypoints_data)
    feet_contact = featurecalc.check_feet_contact(keypoints_data)
    features = np.concatenate((centroid, angles, [feet_contact]))
    feature_vector = np.concatenate((keypoints_data.flatten(), features), axis=0)
    feature=feature_vector.reshape(1,-1)
    x_test1=feature
    
    # Load audio file
    data = stream.read(1024)
    data_np = np.frombuffer(data, dtype=np.float32)
    audio_data = np.concatenate((audio_data, data_np), axis=0)
   

    # Extract audio features from audio_data
    audio_features = np.mean(librosa.feature.mfcc(y=audio_data, sr=rate,n_mfcc=50).T, axis=0)  # Extract features from audio_data
    result=audio_features.reshape([1,50])
    x_test2=result

    # Make predictions with audio and video models
    #audio_pred =gradient_booster.predict(x_test2)
    video_pred =clf.predict(x_test1)
    audio_pred=gradient_booster.predict(x_test2)
    # Implement decision logic
    if audio_pred and  video_pred == 1:
        print("Fall detected!")

    # Perform real-time actions based on the fall detection prediction
    # (e.g., trigger alarm, send notifications, etc.)
    
    # Display video frame
    cv2.imshow('Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to stop the loop
        break

# Clean up resources
stream.stop_stream()
stream.close()
p.terminate()
cap.release()
cv2.destroyAllWindows()


NameError: name 'np' is not defined

In [16]:
display_flag=False
os.chdir('E:/sightica/')
frame_buffer = []
counter = 0


cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
     # Capture audio data from the audio stream
    audio_data_bytes = audio_stream.read(int(sr * duration))

# Convert bytes to NumPy array
    audio_data = np.frombuffer(audio_data_bytes, dtype=np.int16)

    
    # Reshape image
    img = frame.copy()
    
     #Video to audio
   
    
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    keypoints=keypoints_with_scores.reshape([17,3])
    key=keypoints.reshape([1,51])
    keypoints_data = keypoints_with_scores.reshape([17,3])[:, :2]
    centroid = calculate_centroid(keypoints_data)
    keypoints_data = np.array(keypoints_data)
    keypoints_data = np.squeeze(keypoints_data)  # remove dimensions of size 1
    angles = calculate_angles(keypoints_data)
    feet_contact = check_feet_contact(keypoints_data)
    features = np.concatenate((centroid, angles, [feet_contact]))
    feature_vector = np.concatenate((keypoints_data.flatten(), features), axis=0)
    feature=feature_vector.reshape(1,-1)
    x_test1=feature
    file_path = 'E:/sightica/final_dataset/clf.pkl'
    with open(file_path, 'rb') as f:
        clf = pickle.load(f)
        y_pred1 = clf.predict(x_test1)
    mfccs = np.mean(librosa.feature.mfcc(audio_data, sr=sr,n_mfcc=50).T, axis=0)
    result=mfccs.reshape([1,50])
    x_test2=result
    file_path = 'E:/sightica/final_dataset/gradient_booster.pkl'
    with open(file_path, 'rb') as f:
        gradient_booster = pickle.load(f)
        y_pred2 = gradient_booster.predict(x_test2)
   
    
   
        
        

    
   # if y_pred1==1 and display_flag==True:
    if y_pred1==1:
    
            draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
            draw_keypoints(frame, keypoints_with_scores, 0.4)
            cv2.putText(frame, 
                ' Fall', 
                (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
            cv2.imshow("Fall Detected", frame)
            
            # Capture the frame and note the time
            if not os.path.exists('fall_images'):
                 os.mkdir('fall_images')
            img_name = f'fall_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg'
            img_path = os.path.join('fall_images', img_name)
            cv2.imwrite(img_path, frame)
            with open('fall_times.txt', 'a') as f:
                f.write(f'Fall detected at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
   
   
    if y_pred1==0:
    
           
            draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
            draw_keypoints(frame, keypoints_with_scores, 0.4)
            cv2.putText(frame, 
                ' Not Fall', 
                (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
            cv2.imshow("Fall Detected", frame)
       
   
    #Check for key press to exit the loop
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
    #key = cv2.waitKey(1)
    #if key == ord("q"):
        #break

# Release the video capture object and close all windows
cap.release()
audio_stream.stop()
audio_stream.close()
cv2.destroyAllWindows()

PortAudioError: Can't read from a callback stream [PaErrorCode -9977]

In [7]:
display_flag=False
os.chdir('E:/sightica/')
frame_buffer = []
counter = 0


cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
    # Reshape image
    img = frame.copy()
    
     #Video to audio
   
    
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    keypoints=keypoints_with_scores.reshape([17,3])
    key=keypoints.reshape([1,51])
    keypoints_data = keypoints_with_scores.reshape([17,3])[:, :2]
    centroid = calculate_centroid(keypoints_data)
    keypoints_data = np.array(keypoints_data)
    keypoints_data = np.squeeze(keypoints_data)  # remove dimensions of size 1
    angles = calculate_angles(keypoints_data)
    feet_contact = check_feet_contact(keypoints_data)
    features = np.concatenate((centroid, angles, [feet_contact]))
    feature_vector = np.concatenate((keypoints_data.flatten(), features), axis=0)
    feature=feature_vector.reshape(1,-1)
    x_test1=feature
    file_path = 'E:/sightica/final_dataset/clf.pkl'
    with open(file_path, 'rb') as f:
        clf = pickle.load(f)
        y_pred1 = clf.predict(x_test1)
    
    
    frame_buffer.append(y_pred1)
    
    if len(frame_buffer) > 5:
        # Pop the oldest frame from the buffer
        frame_buffer.pop(0)

    # Check if all frames in the buffer indicate a fall (i.e., y_pred1 is 1)
    if all(frame == 1 for frame in frame_buffer):
        counter += 1
    else:
        counter = 0
        
        

    
   # if y_pred1==1 and display_flag==True:
    #if y_pred1==1:
    if counter == 5:
            display_flag=True
            print("Fall detected for 5 subsequent frames")
            draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
            draw_keypoints(frame, keypoints_with_scores, 0.4)
            cv2.putText(frame, 
                ' Fall', 
                (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
            cv2.imshow("Fall Detected", frame)
            
            # Capture the frame and note the time
            if not os.path.exists('fall_images'):
                 os.mkdir('fall_images')
            img_name = f'fall_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg'
            img_path = os.path.join('fall_images', img_name)
            cv2.imwrite(img_path, frame)
            with open('fall_times.txt', 'a') as f:
                f.write(f'Fall detected at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
    elif display_flag:
        cv2.destroyAllWindows()
        display_flag = False
 
            
   # if y_pred1==0 and display_flag:
    #if y_pred1==0:
    #else:
            #display_flag=False
           # print("No fall")
            #draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
            #draw_keypoints(frame, keypoints_with_scores, 0.4)
            #cv2.putText(frame, 
             #   ' Not Fall', 
              #  (50, 50), 
               # cv2.FONT_HERSHEY_SIMPLEX, 1, 
                #(0, 255, 255), 
                #2, 
                #cv2.LINE_4)
            #cv2.imshow("Fall Detected", frame)
        #cv2.destroyWindow("Fall Detected")
    # Display frames and update key press event
    cv2.imshow("Camera", frame)
    #Check for key press to exit the loop
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
    #key = cv2.waitKey(1)
    #if key == ord("q"):
        #break

# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()





Fall detected for 5 subsequent frames
