In [None]:
pip install speechrecognition langdetect

In [12]:
import os
import numpy as np
import moviepy.editor as mp
import librosa
import speech_recognition as sr
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from textblob import TextBlob
from langdetect import detect, detect_langs
import tempfile

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

# Function to convert video to text using moviepy
def video_to_text(video_path):
    try:
        video_clip = mp.VideoFileClip(video_path)
        audio_clip = video_clip.audio
        temp_audio_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
        temp_audio_path = temp_audio_file.name
        temp_audio_file.close()

        audio_clip.write_audiofile(temp_audio_path)

        recognizer = sr.Recognizer()
        with sr.AudioFile(temp_audio_path) as source:
            audio_text = recognizer.record(source)

        transcribed_text = recognizer.recognize_google(audio_text)
        return transcribed_text, temp_audio_path
    except Exception as e:
        print(f"Error in video_to_text: {e}")
        return None, None

# Function to preprocess text
def preprocess_text(text):
    words = word_tokenize(text)
    words = [word.lower() for word in words]
    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    return words

# Function to calculate grammar score
def calculate_grammar_score(text):
    words = preprocess_text(text)
    pos_tags = pos_tag(words)
    noun_count = sum(1 for word, pos in pos_tags if pos.startswith('NN') or pos.startswith('PRP'))
    verb_count = sum(1 for word, pos in pos_tags if pos.startswith('VB'))
    total_words = len(words)
    if total_words == 0:
        return 0
    return ((noun_count + verb_count) / total_words) * 100

# Function to extract MFCC features from an audio file
def extract_features(audio_path):
    try:
        y, sr = librosa.load(audio_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f"Error in extract_features: {e}")
        return None

# Ensure at least two samples per class
def check_class_distribution(labels):
    unique, counts = np.unique(labels, return_counts=True)
    class_distribution = dict(zip(unique, counts))
    if any(count < 2 for count in counts):
        raise ValueError("Each class must have at least 2 samples.")
    return class_distribution

# Function to recognize speech from audio file
def recognize_speech_from_audio(audio_file_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file_path) as source:
            audio = recognizer.record(source)
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
    except Exception as e:
        print(f"Error in recognize_speech_from_audio: {e}")
    return None

# Function to detect language of the text
def detect_language(text):
    try:
        language = detect(text)
        return language
    except Exception as e:
        print(f"Error in detecting language: {e}")
    return None, None

# Function to analyze sentiment using TextBlob
def analyze_sentiment(text):
    blob = TextBlob(text)
    sentiment_score = blob.sentiment.polarity
    if sentiment_score > 0:
        return "Good"
    elif sentiment_score < 0:
        return "Bad"
    else:
        return "Average"

# Function to map sentiment score to a rating out of 10
def analyze_sentiment_rating(text):
    blob = TextBlob(text)
    sentiment_score = blob.sentiment.polarity
    return (sentiment_score + 1) * 5  # Map [-1, 1] to [1, 10]

def gender_predict(temp_audio_path):
 # Example audio gender prediction
        audio_files = [
            'male-audio.wav',
            'male-audio1.wav',
            'female-audio.wav',
            'female-audio1.wav'
        ]
        labels = [
            0,  # Male
            0,  # Male
            1,  # Female
            1   # Female
        ]

        try:
            check_class_distribution(labels)

            features = np.array([extract_features(file) for file in audio_files])
            X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.5, random_state=42, stratify=labels)

            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            classifier = SVC(kernel='linear', probability=True)
            classifier.fit(X_train_scaled, y_train)

            own_features = extract_features(temp_audio_path)
            if own_features is not None:
                own_features_scaled = scaler.transform([own_features])
                predicted_gender = classifier.predict(own_features_scaled)
                return('Female' if predicted_gender[0] == 1 else 'Male')
            else:
                print("Failed to extract features from the audio file for gender prediction.")
        except Exception as e:
            print(f"Error in audio gender prediction: {e}")

        # Clean up the temporary audio file
        if os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)


# Main function to integrate functionalities
def main(video_path):
    # Example video to text conversion
    video_text, temp_audio_path = video_to_text(video_path)
    if video_text:
        print("_____________________________________________________________________")
        print("\nPsychometric Analysis To Given Video:")
        print("\nVideo to Text Convert successfully:")
        print("Text:", video_text)

        # Example grammar score calculation
        grammar_score = calculate_grammar_score(video_text)
        print(f"Grammar Score: {grammar_score:.2f}/100")

        # Example sentiment analysis
        sentiment = analyze_sentiment(video_text)
        sentiment_rating = analyze_sentiment_rating(video_text)
        print("Facial Expression:", sentiment)
        print(f"Facial Expression Rating: {sentiment_rating:.1f}/10")

        # Example language detection
        detected_language = detect_language(video_text)
        print("Detected Language:", detected_language)

        # Gender prediction
        gender_prediction = gender_predict(temp_audio_path)
        print("Gender Prediction:",gender_prediction)


video_path = "project-video.mp4"
main(video_path)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


MoviePy - Writing audio in /tmp/tmp56t5vq8c.wav




MoviePy - Done.
_____________________________________________________________________

Psychometric Analysis To Given Video:

Video to Text Convert successfully:
Text: hello my name is Patricia Espiritu I am 24 years old from the Philippines I have a bachelor's degree in Business Administration major in financial management and I have a tissue and tefl certification I am a bank employee for almost 4 years now where I was at customer service for 2 years I love teaching kids and I'm very excited to make a great impact in your company
Grammar Score: 70.27/100
Facial Expression: Good
Facial Expression Rating: 6.6/10
Detected Language: en
Gender Prediction: Female
