# **Installing Required Libraries**

In [12]:
!pip install mediapipe opencv-python pandas scikit-learn



# **Importing Mediapipe and OpenCV**

In [2]:
import mediapipe as mp
import cv2

# **Declare and import Mediapipe Holistic**

In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# **Implement Mediapipe Holistic and detect landmarks on your Body**

In [5]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        frame = cv2.resize(frame, (640, 480))

        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks

        # Recolor image back to BGR for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                  mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                  )

        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                  )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                  )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                  )

        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## Import Libraries

In [6]:
import csv
import os
import numpy as np
import pandas

In [7]:
num_coords = len(results.pose_landmarks.landmark)+len(results.face_landmarks.landmark) ##Total amount of landmarks detected

num_coords

501

In [8]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks+= ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

# List of all the landmarks with the X, Y, Z and Visibility Parameters

!['Map of all Mediapipe Holistic Landmarks'](https://google.github.io/mediapipe/images/mobile/pose_tracking_full_body_landmarks.png)

In [9]:
landmarks

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21',
 'x22',
 'y22',
 'z22',
 'v22',
 'x23',
 'y23',
 'z23',
 'v23',
 'x24',
 'y24',
 'z24',
 'v24',
 'x25',
 'y25',
 'z25',
 'v25',
 'x26',
 'y26',
 'z26',
 'v26',
 'x27',
 'y27',
 'z27',
 'v27',
 'x28',
 'y28',
 'z28',
 'v28',
 'x29',
 'y29',
 'z29',
 'v29',
 'x30',
 'y30',
 'z30',
 'v30',
 'x31',
 'y31',
 'z31',
 'v31',
 'x32',
 'y32',
 'z32',
 'v32',
 '

## **Defining the class name(emotion)**

In [10]:
class_name = "Happy"

## **Writes to a file called coords.csv where all landmarks are being stored**

In [191]:
with open('coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

## **Captures the emotion to append coordinates to 'coords.csv'**

In [192]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()

        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks

        # Recolor image back to BGR for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                  mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                  )

        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                  )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                  )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                  )
        #Export Coordinates
        try:
            #Gets Pose Landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            #Gets Face Landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            #Combines both the above lists
            row = pose_row+face_row
            
            #Adds the Class Name Column
            row.insert(0, class_name)
            
            #Exports Face/Pose Landmarks to CSV File
            with open(class_name+'.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
            
        except:
            pass

            
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

## **Importing the csv file**

In [12]:
df = pd.read_csv('coords.csv')

In [13]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Sad,0.576529,0.827049,-1.712944,0.999285,0.605775,0.71067,-1.726587,0.998386,0.626264,...,-0.036148,0,0.64986,0.65973,-0.013494,0,0.655686,0.651342,-0.013994,0
1,Sad,0.570876,0.828265,-2.180692,0.999272,0.603048,0.714301,-2.185753,0.998456,0.623798,...,-0.037667,0,0.651024,0.661732,-0.016429,0,0.657148,0.65217,-0.016852,0
2,Sad,0.561089,0.830556,-2.192711,0.999278,0.59716,0.718821,-2.195179,0.998547,0.617687,...,-0.039388,0,0.649545,0.661109,-0.018589,0,0.655703,0.650947,-0.019088,0
3,Sad,0.559284,0.830674,-2.23771,0.999179,0.595134,0.719776,-2.233418,0.998569,0.615416,...,-0.041665,0,0.64933,0.661595,-0.021011,0,0.655609,0.650971,-0.021473,0
4,Sad,0.558821,0.830375,-2.215702,0.999115,0.594393,0.719516,-2.210604,0.998608,0.614618,...,-0.040906,0,0.645321,0.659638,-0.020521,0,0.651834,0.648958,-0.021043,0


In [14]:
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
1840,Happy,0.55632,0.591801,-2.06192,0.997297,0.58655,0.497047,-2.007233,0.998086,0.60786,...,-0.017106,0,0.639315,0.49446,0.011675,0,0.646399,0.486183,0.012479,0
1841,Happy,0.556317,0.591695,-2.056154,0.997271,0.586485,0.496974,-2.001709,0.998071,0.607799,...,-0.01655,0,0.638499,0.494134,0.012259,0,0.645676,0.485545,0.013116,0
1842,Happy,0.555892,0.590378,-2.056362,0.997067,0.586283,0.496112,-2.002282,0.997987,0.607675,...,-0.017209,0,0.639174,0.495295,0.012148,0,0.646275,0.486921,0.013012,0
1843,Happy,0.555851,0.590339,-2.057995,0.996948,0.586236,0.496084,-2.003749,0.997965,0.607628,...,-0.017054,0,0.63948,0.495816,0.012252,0,0.64659,0.4874,0.013117,0
1844,Happy,0.555979,0.590647,-2.058151,0.996872,0.586337,0.496324,-2.004448,0.9979,0.607725,...,-0.018267,0,0.638817,0.495276,0.01047,0,0.645952,0.486944,0.011285,0


In [15]:
df[df['class']=='Attentive']

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
789,Attentive,0.592306,0.643177,-1.652350,0.999706,0.629199,0.544809,-1.608103,0.999448,0.652744,...,-0.011320,0,0.680274,0.529991,0.025191,0,0.686457,0.524038,0.026110,0
790,Attentive,0.594496,0.645313,-2.127808,0.999387,0.628795,0.550551,-2.075946,0.999305,0.652062,...,-0.010297,0,0.679394,0.532609,0.025299,0,0.685982,0.526065,0.026109,0
791,Attentive,0.595155,0.645808,-2.143837,0.999031,0.628280,0.552910,-2.086152,0.999092,0.651323,...,-0.009610,0,0.679116,0.534122,0.026488,0,0.685560,0.527742,0.027299,0
792,Attentive,0.595554,0.646614,-2.198131,0.998744,0.627589,0.554425,-2.134501,0.998874,0.650311,...,-0.010434,0,0.679384,0.537229,0.025479,0,0.685862,0.530537,0.026376,0
793,Attentive,0.597417,0.647405,-2.179438,0.998658,0.627536,0.555719,-2.112373,0.998806,0.650254,...,-0.011719,0,0.680084,0.537349,0.024424,0,0.686450,0.531001,0.025199,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,Attentive,0.595280,0.649366,-1.949187,0.998376,0.616858,0.556453,-1.899229,0.998641,0.642552,...,-0.011475,0,0.672483,0.542610,0.020590,0,0.678927,0.535227,0.021273,0
1118,Attentive,0.596232,0.649871,-1.941711,0.998373,0.617837,0.556892,-1.890642,0.998632,0.643991,...,-0.012280,0,0.675292,0.540990,0.020114,0,0.681673,0.533482,0.020765,0
1119,Attentive,0.596897,0.650696,-1.965100,0.998358,0.618469,0.557521,-1.915053,0.998616,0.644954,...,-0.012206,0,0.674863,0.540847,0.019634,0,0.681367,0.533416,0.020276,0
1120,Attentive,0.597426,0.652202,-1.971318,0.998341,0.618955,0.558290,-1.921316,0.998584,0.645636,...,-0.012650,0,0.674773,0.541468,0.019683,0,0.681216,0.534113,0.020330,0


In [16]:
X = df.drop('class', axis=1) #Features
y = df['class'] #Target Variable

## **Spliting our dataset(coords.csv) for Training and Testing**

In [17]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1234)

In [18]:
x_train

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
1448,0.542311,0.643588,-1.065149,0.999873,0.552317,0.566192,-0.983929,0.999958,0.564849,0.569089,...,0.002977,0,0.582086,0.561031,0.047036,0,0.586473,0.555767,0.049533,0
1107,0.592651,0.646181,-1.952886,0.998314,0.615078,0.554742,-1.912584,0.998602,0.639891,0.554521,...,-0.012000,0,0.671217,0.542782,0.020181,0,0.677551,0.535302,0.020870,0
133,0.569542,0.858804,-2.778629,0.997881,0.608432,0.745787,-2.752179,0.998835,0.632067,0.739916,...,-0.051525,0,0.675279,0.694145,-0.032003,0,0.681629,0.685900,-0.033136,0
770,0.352531,0.932777,-1.766242,0.988930,0.339703,0.811965,-1.816632,0.992508,0.359423,0.786149,...,-0.044087,0,0.388795,0.701920,-0.056730,0,0.390267,0.687022,-0.060293,0
1115,0.594780,0.647236,-1.928288,0.998483,0.616294,0.554895,-1.881228,0.998739,0.641616,0.554626,...,-0.011782,0,0.670534,0.542990,0.020171,0,0.676856,0.535992,0.020821,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1228,0.522151,0.648672,-1.227713,0.998361,0.556467,0.556414,-1.179708,0.995457,0.579925,0.552057,...,-0.006915,0,0.594072,0.534157,-0.007297,0,0.600371,0.525686,-0.008653,0
1077,0.597932,0.649041,-1.941810,0.997955,0.619473,0.557472,-1.894575,0.998304,0.646962,0.557902,...,-0.010591,0,0.674446,0.543353,0.022310,0,0.680756,0.536475,0.022982,0
1318,0.518883,0.635516,-1.081819,0.998321,0.537876,0.562070,-0.974722,0.998233,0.550585,0.562128,...,0.004784,0,0.556433,0.552954,0.050478,0,0.559925,0.548696,0.053111,0
723,0.298960,0.922575,-1.342327,0.990968,0.279989,0.806382,-1.394714,0.993825,0.297732,0.779800,...,-0.020044,0,0.307309,0.716769,-0.037871,0,0.306091,0.700201,-0.040903,0


# Implementing the ML model

In [19]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

## **Defining the pipelines/models we shall use**

In [20]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

## Training each model in the pipeline with the Training Dataset

In [22]:
fit_models = {}

for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

## **Predicting values using 'Random Forests Classifier'**

In [24]:
fit_models['rf'].predict(x_test)

array(['Distracted', 'Sad', 'Sad', 'Sleepy', 'Distracted', 'Sad', 'Happy',
       'Sad', 'Attentive', 'Attentive', 'Sad', 'Distracted', 'Sad',
       'Attentive', 'Distracted', 'Sleepy', 'Distracted', 'Happy',
       'Happy', 'Sleepy', 'Sleepy', 'Sad', 'Sad', 'Sad', 'Sleepy', 'Sad',
       'Happy', 'Sleepy', 'Distracted', 'Sad', 'Attentive', 'Distracted',
       'Sleepy', 'Sleepy', 'Sleepy', 'Sleepy', 'Attentive', 'Distracted',
       'Distracted', 'Sad', 'Distracted', 'Attentive', 'Distracted',
       'Sleepy', 'Attentive', 'Happy', 'Sleepy', 'Sleepy', 'Distracted',
       'Attentive', 'Sleepy', 'Distracted', 'Sad', 'Sleepy', 'Sleepy',
       'Distracted', 'Sleepy', 'Sleepy', 'Sleepy', 'Attentive', 'Sleepy',
       'Attentive', 'Attentive', 'Attentive', 'Sad', 'Attentive',
       'Distracted', 'Happy', 'Attentive', 'Distracted', 'Attentive',
       'Happy', 'Happy', 'Sad', 'Happy', 'Sad', 'Attentive', 'Distracted',
       'Sleepy', 'Attentive', 'Happy', 'Distracted', 'Sleepy',
       

In [25]:
from sklearn.metrics import accuracy_score, precision_score
import pickle

## **Calculating and displaying the accuracy for each model**

In [26]:
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))
    print(algo, precision_score(y_test, yhat, average="weighted"))

lr 1.0
lr 1.0
rc 1.0
rc 1.0
rf 1.0
rf 1.0
gb 0.9981949458483754
gb 0.9982126424577051


In [27]:
fit_models['lr'].predict(x_test)

array(['Distracted', 'Sad', 'Sad', 'Sleepy', 'Distracted', 'Sad', 'Happy',
       'Sad', 'Attentive', 'Attentive', 'Sad', 'Distracted', 'Sad',
       'Attentive', 'Distracted', 'Sleepy', 'Distracted', 'Happy',
       'Happy', 'Sleepy', 'Sleepy', 'Sad', 'Sad', 'Sad', 'Sleepy', 'Sad',
       'Happy', 'Sleepy', 'Distracted', 'Sad', 'Attentive', 'Distracted',
       'Sleepy', 'Sleepy', 'Sleepy', 'Sleepy', 'Attentive', 'Distracted',
       'Distracted', 'Sad', 'Distracted', 'Attentive', 'Distracted',
       'Sleepy', 'Attentive', 'Happy', 'Sleepy', 'Sleepy', 'Distracted',
       'Attentive', 'Sleepy', 'Distracted', 'Sad', 'Sleepy', 'Sleepy',
       'Distracted', 'Sleepy', 'Sleepy', 'Sleepy', 'Attentive', 'Sleepy',
       'Attentive', 'Attentive', 'Attentive', 'Sad', 'Attentive',
       'Distracted', 'Happy', 'Attentive', 'Distracted', 'Attentive',
       'Happy', 'Happy', 'Sad', 'Happy', 'Sad', 'Attentive', 'Distracted',
       'Sleepy', 'Attentive', 'Happy', 'Distracted', 'Sleepy',
       

In [28]:
y_test

1271    Distracted
232            Sad
220            Sad
623         Sleepy
1430    Distracted
           ...    
930      Attentive
755         Sleepy
854      Attentive
637         Sleepy
552         Sleepy
Name: class, Length: 554, dtype: object

## **Uses Pickle to save the model**

In [29]:
with open('emotions.pkl', 'wb') as f:
    pickle.dump(fit_models['lr'], f)

## **Makes model detections**

In [30]:
#Make Model Detections
with open('emotions.pkl', 'rb') as f:
    model = pickle.load(f)

# **Detecting emotions on Zoom Video Recordings**

## Installing Libraries

In [155]:
!pip install SpeechRecognition pydub nltk



## Importing Libraries

In [173]:
#Libraries for Speech to Text
import speech_recognition as sr
import time
import datetime
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence

#Libraries for Text Summary
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

In [174]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sabad\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Speech-to-Text

In [175]:
# create a speech recognition object
r = sr.Recognizer()

# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("")
            else:
                text = f"{text.capitalize()}. "
                whole_text += text
    # return the text for all chunks detected
    return whole_text

## Text Summarizer

In [176]:
def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []

    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))


## Analyzing Emotions and displaying different statistics

In [177]:
filename = "audio_only.wav"

cap = cv2.VideoCapture('zoom_0.mp4')

# DECLARING VARIABLES

timeOfStayAttentive = 0
timeOfStayDistracted = 0
timeOfStayHappy = 0
timeOfStaySad = 0
timeOfStaySleepy = 0

attention_count = 0
attention_array = []
attention_frames = []

distracted_count = 0
distracted_array = []
distracted_frames = []

happy_count = 0
happy_array = []
happy_frames = []

sad_count = 0
sad_array = []
sad_frames = []

sleepy_count = 0
sleepy_array = []
sleepy_frames = []

prev_frame_time = 0
new_frame_time = 0


def average(listVar):
    return sum(listVar)/len(listVar)


# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        frame = cv2.resize(frame, (640,480))
        
        new_frame_time = time.time()
        
        fps = 1/(new_frame_time-prev_frame_time)
        prev_frame_time = new_frame_time
 
        fps = int(fps)
        fps = str(fps)
        
        cv2.putText(frame, fps, (7, 70), cv2.FONT_HERSHEY_SIMPLEX, 3, (100, 255, 0), 3, cv2.LINE_AA)
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        # Recolor image back to BGR for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                  mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                 )

        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                 )

        #Export Coordinates
        try:
            #Gets Pose Landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())

            #Gets Face Landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())

            #Combines both the above lists
            row = pose_row+face_row

            #Make Detections
            X = pd.DataFrame([row])
            emotion_class = model.predict(X)[0]
            #             emotion_prob = model.predict_proba(X)[0]
            #             print(emotion_class, emotion_prob)
            
            #             AVERAGE ATTENTION SPAN OF THE MEETING
            #             WHICH WAS THE MOST PROMINENT EMOTION DURING THE MEETING
            #             DID ANYONE SLEEP, IS SO, HOW MANY TIMES -> SUGGESTIONS ON WHAT TO DO FOR NOT SLEEPING IN A MEETING
            
            if(emotion_class == "Attentive"):
                #Adds the duration to the arrays
                distracted_array.append(timeOfStayDistracted)
                sleepy_array.append(timeOfStaySleepy)
                sad_array.append(timeOfStaySad)
                happy_array.append(timeOfStayHappy)
                
                #Sets the duration back to 0
                timeOfStayDistracted = 0
                timeOfStayHappy = 0
                timeOfStaySad = 0
                timeOfStaySleepy = 0
                
                #Finds the duration
                attention_count += 1
                attention_frames.append(attention_count)
                timeOfStayAttentive = (len(attention_frames) - 1) / int(fps)
                attention_count = 0
                                
            elif(emotion_class == "Distracted"):
                #Adds the duration to the arrays
                attention_array.append(timeOfStayAttentive)
                sleepy_array.append(timeOfStaySleepy)
                sad_array.append(timeOfStaySad)
                happy_array.append(timeOfStayHappy)
                
                #Sets the duration back to 0
                timeOfStayAttentive = 0
                timeOfStayHappy = 0
                timeOfStaySad = 0
                timeOfStaySleepy = 0
                
                #Finds the duration
                distracted_count += 1
                distracted_frames.append(distracted_count)
                timeOfStayDistracted = (len(distracted_frames) - 1) / int(fps)
                
            elif(emotion_class == "Sleepy"):
                #Adds the duration to the arrays
                attention_array.append(timeOfStayAttentive)
                sad_array.append(timeOfStaySad)
                happy_array.append(timeOfStayHappy)
                distracted_array.append(timeOfStayDistracted)
                
                #Sets the duration back to 0
                timeOfStayAttentive = 0
                timeOfStayDistracted = 0
                timeOfStayHappy = 0
                timeOfStaySad = 0
                
                #Finds the duration
                sleepy_count += 1
                sleepy_frames.append(distracted_count)
                timeOfStaySleepy = (len(sleepy_frames) - 1) / int(fps)
                
                if(timeOfStaySleepy > 3):
                    timeOfStaySleepy = timeOfStaySleepy
                else:
                    timeOfStaySleepy = 0
                
            elif(emotion_class == "Sad"):
                #Adds the duration to the arrays
                attention_array.append(timeOfStayAttentive)
                sleepy_array.append(timeOfStaySleepy)
                happy_array.append(timeOfStayHappy)
                distracted_array.append(timeOfStayDistracted)
                
                #Sets the duration back to 0
                timeOfStayAttentive = 0
                timeOfStayDistracted = 0
                timeOfStayHappy = 0
                timeOfStaySleepy = 0
                
                #Finds the duration
                sad_count += 1
                sad_frames.append(sad_count)
                timeOfStaySad = (len(sad_frames) - 1) / int(fps)
                
                
            elif(emotion_class == "Happy"):
                #Adds the duration to the arrays
                attention_array.append(timeOfStayAttentive)
                sleepy_array.append(timeOfStaySleepy)
                sad_array.append(timeOfStaySad)
                distracted_array.append(timeOfStayDistracted)
                
                #Sets the duration back to 0
                timeOfStayAttentive = 0
                timeOfStayDistracted = 0
                timeOfStaySad = 0
                timeOfStaySleepy = 0
                
                #Finds the duration
                happy_count += 1
                happy_frames.append(happy_count)
                timeOfStayHappy = (len(happy_frames) - 1) / int(fps)
                

            #Grab Coordinates
            coords = tuple(np.multiply(
                np.array(
                    (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x,
                     results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y
                    )), [640, 480]).astype(int)
                )

            #Show detected class name
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(emotion_class)*20, 
                           coords[1]-30),
                          (245,117,16), -1)
            cv2.putText(image, emotion_class, coords, cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

        except:
            pass


        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

attention_array = list(filter(lambda num: num != 0, attention_array))
distracted_array = list(filter(lambda num: num != 0, distracted_array))
happy_array = list(filter(lambda num: num != 0, happy_array))
sad_array = list(filter(lambda num: num != 0, sad_array))
sleepy_array = list(filter(lambda num: num != 0, sleepy_array))

# average_attention = result = '{0:02.0f}:{1:02.0f}'.format(*divmod(100 * 60, 60))
try:
    average_attention = datetime.timedelta(seconds=average(attention_array))
except ZeroDivisionError:
    average_attention = 0
    
print("The average attention span of all participants throughut the meeting was:", average_attention)

TotalAttention = sum(attention_array)
TotalDistracted = sum(distracted_array)
TotalSleepy = sum(sleepy_array)
TotalSad = sum(sad_array)
TotalHappy = sum(happy_array)

emotionsDict = {
    'Attentive': TotalAttention,
    'Distracted': TotalDistracted,
    'Sleepy': TotalSleepy,
    'Sad': TotalSad,
    'Happy': TotalHappy
}
time.sleep(1)
prominentEmotion = max(emotionsDict, key=emotionsDict.get)
print("The most prominent emotion is:", prominentEmotion)

try:
    average_sleepy = datetime.timedelta(seconds=average(sleepy_array))
except ZeroDivisionError:
    average_sleepy = 0

if sleepy_array:
    print("The average sleeping time was:",average_sleepy)        
    time.sleep(0.5)
    print("I would suggest making the meeting more interactive and more engaging for future meetings")
    time.sleep(0.5)
    print("This would prevent people from sleeping off")
else:
    print("Very Good!!")
    time.sleep(0.5)
    print("No one slept off during the meeting")

transcript = open("transcript.txt", "w+")
print("Generating Transcript of the meeting.........")
transcript.write(get_large_audio_transcription(filename))
transcript.close()
print("Successfully written to transcript.txt")
time.sleep(1)
print("Here is a short summary of the meeting:-")
generate_summary( "transcript.txt", 2)


The average attention span of all participants throughut the meeting was: 0:00:01.187500
The most prominent emotion is: Attentive
Very Good!!
No one slept off during the meeting
Generating Transcript of the meeting.........











Successfully written to transcript.txt
Here is a short summary of the meeting:-
Hello how are you
Test test
This is a test video
Now going to
Nakhun to become distracted and onto the screen
More tension through the milton

Indexes of top ranked_sentence order are  [(0.38461394503699514, ['This', 'is', 'a', 'test', 'video']), (0.38461394503699514, ['Test', 'test']), (0.05769302748150241, ['Now', 'going', 'to']), (0.05769302748150241, ['Nakhun', 'to', 'become', 'distracted', 'and', 'onto', 'the', 'screen']), (0.05769302748150241, ['More', 'tension', 'through', 'the', 'milton']), (0.05769302748150241, ['Hello', 'how', 'are', 'you'])]
Summarize Text: 
 This is a test video. Test test
