In [None]:
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import face_recognition
import pandas as pd
import subprocess
import urllib.request
from pytube import YouTube
import os
from youtube_transcript_api import YouTubeTranscriptApi
import math
import time

In [None]:
# Function to calculate speaker position relative to the center
def calculate_speaker_position(frame, shoulder_midpoint_x):
    frame_width = frame.shape[1]
    speaker_position = shoulder_midpoint_x - frame_width / 2
    return speaker_position

In [None]:
emotions = []
emotion_labels = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprised', 'Neutral']
emotion_model = load_model('emotion_model.h5')
face_haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def one_hot_encode(number, num_classes=7):
    """
    One-hot encodes a number from 1 to num_classes.
    """
    encoding = [0] * num_classes
    encoding[number - 1] = 1
    return encoding

def get_emotion(frame):
    frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_haar_cascade.detectMultiScale(frame_grey)
    if len(faces) > 0:
        (x, y, w, h) = faces[0]
        cv2.rectangle(frame, pt1=(x, y), pt2=(x + w, y + h), color=(0, 0, 255), thickness=2)
        roi_gray = frame_grey[y - 5:y + h + 5, x - 5:x + w + 5]
        if not roi_gray.size == 0:
            roi_gray = cv2.resize(roi_gray, (48, 48))
            image_pixels = img_to_array(roi_gray)
            image_pixels = np.expand_dims(image_pixels, axis=0)
        else:
            image_pixels = None
            # matrxOfCurrentSecond.append(0)
            return [0,0,0,0,0,0,0]
        image_pixels /= 255
        predictions = emotion_model.predict(image_pixels)
        max_index = np.argmax(predictions[0])
        detected_emotion = emotion_labels[max_index]
        emotions.append(detected_emotion)
        # matrxOfCurrentSecond.append(max_index + 1)
        return one_hot_encode(max_index + 1)
    else:
        emotions.append(None)
        # matrxOfCurrentSecond.append(0)
        return [0,0,0,0,0,0,0]

In [None]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_holistic = mp.solutions.holistic
holistic_model = mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

def get_head_turn_angle(results):
    list_of_head_turn_angles = []
    left_eye = results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_EYE_INNER]
    right_eye = results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EYE_INNER]
    nose = results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE]
    if left_eye and right_eye and nose:
        eye_line_vector = np.array([right_eye.x - left_eye.x, right_eye.y - left_eye.y])
        eye_left_nose_vector = np.array([nose.x - left_eye.x, nose.y - left_eye.y])
        eye_right_nose_vector = np.array([right_eye.x - nose.x, right_eye.y - nose.y])

        dot_product_left = np.dot(eye_line_vector, eye_left_nose_vector)
        eye_line_magnitude = np.linalg.norm(eye_line_vector)
        eye_left_nose_magnitude = np.linalg.norm(eye_left_nose_vector)

        dot_product_right = np.dot(eye_line_vector, eye_right_nose_vector)
        eye_right_nose_magnitude = np.linalg.norm(eye_right_nose_vector)

        cosine_angle_left = dot_product_left / (eye_line_magnitude * eye_left_nose_magnitude)
        cosine_angle_right = dot_product_right / (eye_line_magnitude * eye_right_nose_magnitude)

        head_turn_angle_left = np.arccos(cosine_angle_left) * (180 / np.pi)
        head_turn_angle_right = np.arccos(cosine_angle_right) * (180 / np.pi)

        list_of_head_turn_angles.append(head_turn_angle_left)
        list_of_head_turn_angles.append(head_turn_angle_right)
    else:
        list_of_head_turn_angles.append(0)
        list_of_head_turn_angles.append(0)
    return list_of_head_turn_angles

def get_shoulder_midpoint(results):
    shoulder_midpoints = []
    left_shoulder = results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
    right_shoulder = results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]

    if left_shoulder and right_shoulder:
        shoulder_midpoint_x = (left_shoulder.x + right_shoulder.x) / 2
        shoulder_midpoints.append(shoulder_midpoint_x)
    else:
        shoulder_midpoints.append(0)
    return shoulder_midpoints

In [None]:

# mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic()

excel_file = 'testdata10videos.xlsx'
sheet_name = 'Sheet1'
df = pd.read_excel(excel_file, sheet_name=sheet_name)
output_folder = "Transcripts"
os.makedirs(output_folder, exist_ok=True)


for index, row in df.iterrows():
    # video_id = row["youtube_video_code"]
    # if os.path.exists(f'matrix_of_video_{row["_id"]}.npy'):
    #     print(f"Skipping video {video_id} because matrix already exists")
    #     continue
    # matrixOfCurrentVideo = []

    # if isinstance(video_id, str):
    #     transcript = download_transcript_with_timestamps(video_id)
    #     file_name = os.path.join(output_folder, f"transcript_{row['_id']}.txt")
    #     with open(file_name, "w", encoding="utf-16") as file:
    #         file.write(transcript)
    # else:
    #     print(f"Skipping invalid video code: {video_id}")
    #     continue

    # popularity_labels.append(convert_to_float(row['likes']) / convert_to_float(row['views']))

    video_link = row['youtube_video_code']
    youtube_url = "https://www.youtube.com/watch?v=" + video_link
    yt = YouTube(youtube_url)
    video_stream = yt.streams.get_highest_resolution()
    cap = cv2.VideoCapture(video_stream.url)

    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    frame_count = int(cap.get(7))
    fps = int(cap.get(5))
    video_length_seconds = frame_count / fps

    # matrixOfNumberOfWps, transcript_embedding = calculate_avg_words_between_timestamps(file_name, video_length_seconds)


#     output_video = 'output_video.mp4'
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#     out = cv2.VideoWriter(output_video, fourcc, fps, (output_width, output_height))

# video_length_seconds/64

    # frame_index = 0
    current_second = 0
    print(math.floor((row["duration"] - 10)/64))
    for numWindows in range(math.floor((row["duration"] - 10)/64)):
        if numWindows == 0:
            continue
        isret = True
        
        
        matrixOfCurrentWindow = []
        for window64sec in range(64):
            matrxOfCurrentSecond = []
            num = 0
            for onesec in range(fps):
                ret, frame = cap.read()
                if not ret:
                    isret = False
                    break

                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                
                if num<8 and onesec % (math.floor(fps / 8)) == 0:
                    num = num+1
                    #emotion
                    emotions_one_hot_encoding = get_emotion(frame)
                    for e in emotions_one_hot_encoding:
                        matrxOfCurrentSecond.append(e)
                    

                    results = pose.process(frame_rgb)

                    if results.pose_landmarks is not None:
                        # print("pose landmarks")
                        # mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
                        shoulder_midpoint = get_shoulder_midpoint(results)
                        for p in shoulder_midpoint:
                            matrxOfCurrentSecond.append(p)
                    
                        #head turn angle
                        head_angles = get_head_turn_angle(results)
                        for a in head_angles:
                            matrxOfCurrentSecond.append(a)

                        #pose Landmarks 33
                        for point in results.pose_landmarks.landmark:
                            if point.visibility > 0.5:
                                matrxOfCurrentSecond.append(point.x)
                                matrxOfCurrentSecond.append(point.y)
                            else:
                                matrxOfCurrentSecond.append(0)
                                matrxOfCurrentSecond.append(0)

                    else:
                        matrxOfCurrentSecond.append(0)
                        matrxOfCurrentSecond.append(0)
                        matrxOfCurrentSecond.append(0)

                        for i in range(33):
                            matrxOfCurrentSecond.append(0)
                            matrxOfCurrentSecond.append(0)
                    
               
                    # cv2.imshow('Video Frame', frame)
                    # if cv2.waitKey(1) & 0xFF == ord('q'):
                    #     break
                    # if num==8 and current_second<row["duration"]:
                    #     for a in transcript_embedding[current_second]:
                    #         matrxOfCurrentSecond.append(a)
                        
            
                if current_second>=row["duration"] :
                    break
                # if len(matrxOfCurrentSecond ) < 1376 :
                #     break

            if current_second>=row["duration"] :
                break
            if len(matrxOfCurrentSecond ) < 1376 : ########## 1376 to be changed.
                break
            current_second += 1
            print(current_second)
            matrixOfCurrentWindow.append(matrxOfCurrentSecond)
            # print(len(matrxOfCurrentSecond))
            
            
            
            time.sleep(0.03)  
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break
            # if window64sec==63:
        # matrixOfVideos.append(matrixOfCurrentWindow)
        # popularity_labels.append(convert_to_float(row['likes']) / convert_to_float(row['views']))
       
        currentLabel = [] ########## to be added.....................
        # os.makedirs(matrix_folder := '/dataSetFeatures/', exist_ok=True); np.save(os.path.join(matrix_folder, f'matrix_of_video_{row["_id"]}_{numWindows}.npy'), matrixOfCurrentWindow)
        # os.makedirs(labels_folder := '/labels/', exist_ok=True); np.save(os.path.join(labels_folder, f'popularity_labels_{row["_id"]}_{numWindows}.npy'), currentLabel)

        matrix_folder = 'TestDataSetFeatures'
        labels_folder = 'TestRating'

        # Make sure the folders exist, create them if not
        os.makedirs(matrix_folder, exist_ok=True)
        os.makedirs(labels_folder, exist_ok=True)

        # Assuming row["_id"] and numWindows are defined
        filename_matrix = f'matrix_of_video_{row["_id"]}_{numWindows}.npy'
        filename_labels = f'popularity_labels_{row["_id"]}_{numWindows}.npy'

        # Full paths
        full_path_matrix = os.path.join(matrix_folder, filename_matrix)
        full_path_labels = os.path.join(labels_folder, filename_labels)

        # Saving the NumPy arrays
        np.save(full_path_matrix, matrixOfCurrentWindow)
        np.save(full_path_labels, currentLabel)
    
    # matrixOfVideos.append(matrixOfCurrentVideo)
    # np.save(f'matrix_of_video_{row["_id"]}.npy', matrixOfCurrentVideo)
    # print(len(matrixOfCurrentVideo))
    cap.release()
    # out.release()
    cv2.destroyAllWindows()

# matrix_array = np.array(matrixOfVideos)
# # Save
# np.save('matrix_of_videos.npy', matrix_array)


In [None]:
pip install soundfile


In [None]:
pip install moviepy


In [None]:
pip install pydub

In [None]:
# Run the following commands to install ffmpeg in a separate terminal window:
# sudo apt update 
# sudo apt install ffmpeg

In [None]:
from pytube import YouTube
import os
import librosa
import numpy as np
from pydub import AudioSegment

def download_audio(youtube_url, output_path):
    try:
        # Create a YouTube object
        youtube = YouTube(youtube_url)

        # Get the highest quality audio stream
        audio_stream = youtube.streams.filter(only_audio=True).first()

        # Download the audio in its original format (usually webm or mp4)
        temp_file = audio_stream.download(output_path)

        # Define the output filename
        output_file = os.path.join(output_path, "audio.mp3")

        # Convert the downloaded file to mp3 using pydub
        audio = AudioSegment.from_file(temp_file)
        audio.export(output_file, format="mp3")

        # Optionally, remove the original downloaded file
        os.remove(temp_file)

        print(f"Audio downloaded and converted successfully to: {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")


def calculate_engagement(audio_path):
    # Load audio file
    audio, _ = librosa.load(audio_path)

    # Calculate energy (amplitude) over time
    energy = np.abs(librosa.stft(audio))

    # Calculate mean energy for each frame
    mean_energy = np.mean(energy, axis=0)

    # Calculate the standard deviation of energy to identify peaks
    energy_std = np.std(mean_energy)

    # Assess engagement based on energy
    if energy_std > threshold_peak:
        engagement_score = "High (Alerting)"
    elif energy_std < threshold_low:
        engagement_score = "Low (Boring)"
    else:
        engagement_score = "Moderate"

    return engagement_score


# Replace 'your_youtube_url' with the actual YouTube video URL
video_url = 'https://www.youtube.com/watch?v=Tvn0E3W7I88'

# Replace 'output_audio.mp3' with the desired output audio file name
output_file = 'audio'
download_audio(video_url, output_file)

# Define thresholds for peak and low energy
threshold_peak = 0.05  # Adjust as needed
threshold_low = 0.01   # Adjust as needed

# Example usage

audio_path = "/home/jyoti/Documents/GitHub/Articulation-Meter/audio/audio.mp3"  # Replace with the actual path to your audio file
engagement_score = calculate_engagement(audio_path)
print(f"Engagement Score: {engagement_score}")

In [None]:
import numpy as np
import librosa
from pydub import AudioSegment
from pytube import YouTube

def download_audio(youtube_url, output_path):
    try:
        # Create a YouTube object
        youtube = YouTube(youtube_url)

        # Get the highest quality audio stream
        audio_stream = youtube.streams.filter(only_audio=True).first()

        # Download the audio
        temp_file = audio_stream.download(output_path)

        # Define the output filename
        output_file = os.path.join(output_path, "audio.mp3")

        # Convert the downloaded file to mp3
        audio = AudioSegment.from_file(temp_file)
        audio.export(output_file, format="mp3")

        # Optionally, remove the original downloaded file
        os.remove(temp_file)

        print(f"Audio downloaded and converted successfully to: {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")



def calculate_engagement(audio_path):
    # Load audio file
    audio, _ = librosa.load(audio_path)

    # Calculate energy (amplitude) over time
    energy = np.abs(librosa.stft(audio))

    # Calculate mean energy for each frame
    mean_energy = np.mean(energy, axis=0)

    # Calculate the standard deviation of energy to identify peaks
    energy_std = np.std(mean_energy)

    # Assess engagement based on energy
    if energy_std > threshold_peak:
        engagement_score = "High (Alerting)"
    elif energy_std < threshold_low:
        engagement_score = "Low (Boring)"
    else:
        engagement_score = "Moderate"

    return engagement_score


# Replace 'your_youtube_url' with the actual YouTube video URL
video_url = 'https://www.youtube.com/watch?v=Tvn0E3W7I88'

# Replace 'output_audio.mp3' with the desired output audio file name
output_file = 'audio'

download_audio(video_url, output_file)

# Define thresholds for peak and low energy
threshold_peak = 0.05  # Adjust as needed
threshold_low = 0.01   # Adjust as needed

# Example usage
audio_path = "/home/asad/Documents/GitHub/Articulation-Meter/audio/audio.mp3"  # Replace with the actual path to your audio file
engagement_score = calculate_engagement(audio_path)
print(f"Engagement Score: {engagement_score}")


In [None]:
pip install youtube-transcript-api
pip install SpeechRecognition

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi  
import speech_recognition as sr

def get_transcript(video_id):
    try:
        # video_id = video_url.split("v=")[1]
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = ""

        for entry in transcript:
            start = entry["start"]
            text = entry["text"]
            transcript_text += f"{text}\n"

        return transcript_text

    except Exception as e:
        return f"An error occurred: {str(e)}"
    
print(get_transcript("KLiXmteCvUI"))