# Separating only student audio files from segmented audio based on time stamps

In [None]:
import os
import shutil

# Define source and destination directories
dir1 = '/content/drive/MyDrive/Jyothi Mam/second/temp'
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'

# Ensure the destination directory exists
os.makedirs(dir2, exist_ok=True)

# List all files in the source directory
files = os.listdir(dir1)

# Filter and move files based on the pattern
for file in files:
    if file.startswith('SPEAKER_00'):
        # Define full file path
        source_file = os.path.join(dir1, file)
        destination_file = os.path.join(dir2, file)
        # Move the file
        shutil.move(source_file, destination_file)
        print(f"Moved: {file}")

print("File transfer completed.")


File transfer completed.


# LSTM-Based Emotion Recognition and Grading System with MFCC Feature Extraction

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'
files = os.listdir(dir2)
new_paths = [os.path.join(dir2, file) for file in files]
print(new_paths)

['/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_18.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_20.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_22.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_24.wav', '/con

In [None]:
from keras.models import load_model
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import librosa

model = load_model('/content/drive/MyDrive/Jyothi Mam/second/my_model.h5')

def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

def preprocess_new_data(paths):
    new_mfccs = []
    for path in paths:
        mfcc = extract_mfcc(path)
        new_mfccs.append(mfcc)
    new_mfccs = np.array(new_mfccs)
    new_mfccs = np.expand_dims(new_mfccs, -1)
    return new_mfccs

labels = ['fear', 'angry', 'disgust', 'neutral', 'sad', 'ps', 'happy']
enc = OneHotEncoder()
enc.fit(np.array(labels).reshape(-1, 1))
new_X = preprocess_new_data(new_paths)
predictions = model.predict(new_X)
predicted_labels = enc.inverse_transform(predictions)
for i, label in enumerate(predicted_labels):
    print(f"File: {new_paths[i]} - Predicted Label: {label[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav - Predicted Label: happy
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav - Predicted Label: fear
File: /content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav - Predicted Label: fe

In [None]:
def calculate_grade(predicted_labels):
    emotion_weights = {
        'fear': 10,
        'angry': 5,
        'disgust': 5,
        'neutral': 15,
        'sad': 10,
        'ps': 25,
        'happy': 20
    }
    total_score = 0
    for label in predicted_labels:
        emotion = label[0]
        if emotion in emotion_weights:
            total_score += emotion_weights[emotion]

    # Normalize the score
    max_possible_score = len(predicted_labels) * max(emotion_weights.values())
    normalized_score = (total_score / max_possible_score) * 100

    # Ensure the score is at least 1
    normalized_score = max(1, normalized_score)

    return round(normalized_score, 2)

# Calculate the grade
student_grade = calculate_grade(predicted_labels)

print(f"Student's Interview Grade: {student_grade:.2f} / 100")


Student's Interview Grade: 37.86 / 100


# Modulation Analysis

### Analyzes an audio signal to calculate a modulation score by evaluating pitch variations, volume variations, and tone variations using the librosa library.

- Higher pitch variation indicates more fluctuation in pitch, which might suggest expressive or dynamic speech. Lower variation suggests more monotonic or stable pitch.
- Higher volume variation indicates changes in loudness, suggesting dynamic or expressive speech. Lower variation suggests more consistent or even volume.
- Higher tone variation suggests more dynamic or varied tonal quality in the speech, while lower variation suggests more consistent tonal quality.
- Lower modulation scores (closer to 0) indicate more monotonic and consistent speech, with little variation in pitch, volume, and tone.
- Higher modulation scores (200-350 in your case) indicate more dynamic, varied, and possibly more engaging speech, with significant changes in pitch, volume, and tone.




In [None]:
import librosa
import numpy as np

def analyze_modulation(y, sr):
    # Pitch analysis
    pitches, _ = librosa.piptrack(y=y, sr=sr)
    pitch_variations = np.std(pitches[pitches > 0])

    # Volume analysis
    rms = librosa.feature.rms(y=y)[0]
    volume_variations = np.std(rms)

    # Spectral contrast for tone
    contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr), axis=1)
    tone_variations = np.std(contrast)

    # Combine into a single score
    modulation_score = (pitch_variations + volume_variations + tone_variations) / 3

    return modulation_score

In [None]:
def analyze_audio_files(paths):
    scores = {}
    for path in paths:
        if path.lower().endswith('.wav'):
            y, sr = librosa.load(path, sr=None)
            score = analyze_modulation(y, sr)
            filename = os.path.basename(path)
            scores[filename] = score
    return scores

# Directory containing your audio files
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'
files = os.listdir(dir2)
new_paths = [os.path.join(dir2, file) for file in files]
print(new_paths)

# Analyze the audio files and get modulation scores
modulation_scores = analyze_audio_files(new_paths)

# Print the results
for filename, score in modulation_scores.items():
    print(f"File: {filename} - Modulation Score: {score:.2f}")

['/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_18.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_20.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_22.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_24.wav', '/con

# Confidence Assessment
### Assesses the confidence level in speech by analyzing the speech rate, pause ratio, and pitch stability of an audio signal, then combines these factors into a single confidence score using the librosa library.

-  A higher speech rate (faster tempo) is generally associated with higher confidence, as confident speakers tend to speak more fluently and quickly.
- A lower pause ratio (i.e., more continuous speech with fewer pauses) is often associated with higher confidence, as confident speakers tend to have fewer hesitations.
- Higher pitch stability (lower variation) suggests a more consistent and steady voice, which can be a sign of confidence. A shaky or highly variable pitch might indicate nervousness.
- Scores closer to 1 suggest a highly confident speaker.
- Scores closer to 0 suggest a less confident or more hesitant speaker.




In [None]:
def assess_confidence(y, sr):
    # Speech rate
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

    # Pauses
    non_silent_intervals = librosa.effects.split(y, top_db=20)
    total_duration = len(y) / sr
    speech_duration = sum(interval[1] - interval[0] for interval in non_silent_intervals) / sr
    pause_ratio = 1 - (speech_duration / total_duration)

    # Pitch stability
    pitches, _ = librosa.piptrack(y=y, sr=sr)
    pitch_stability = 1 / (np.std(pitches[pitches > 0]) + 1)

    # Combine factors
    confidence_score = (tempo / 120 + (1 - pause_ratio) + pitch_stability) / 3

    return confidence_score

In [None]:
def analyze_confidence_scores(paths):
    scores = {}

    for path in paths:
        if path.lower().endswith('.wav'):
            y, sr = librosa.load(path, sr=None)
            score = assess_confidence(y, sr)
            filename = os.path.basename(path)
            scores[filename] = score[0]
    return scores

# Directory containing your audio files
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'
files = os.listdir(dir2)
new_paths = [os.path.join(dir2, file) for file in files]
print(new_paths)

# Analyze the audio files and get confidence scores
confidence_scores = analyze_confidence_scores(new_paths)

# Print the results
for filename, score in confidence_scores.items():
    print(f"File: {filename} - Confidence Score: {score:.2f}")


['/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_18.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_20.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_22.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_24.wav', '/con

# Filler Words Detection
##### This code detects filler words in an audio file by converting the speech to text using Google's speech recognition service and then calculating the ratio of filler words (like "um" and "uh") to the total word count. If speech recognition fails, it returns 0.

- A value close to 0 indicates fewer filler words relative to the total number of words, while a value close to 1 indicates a higher proportion of filler words

In [None]:
!pip install SpeechRecognition



In [None]:
import speech_recognition as sr

def detect_filler_words(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio)
        filler_words = ['um', 'uh', 'like', 'you know', 'so', 'Basically', 'Actually', 'Just', 'Really']
        filler_count = sum(text.lower().count(word) for word in filler_words)
        word_count = len(text.split())
        filler_ratio = filler_count / word_count if word_count > 0 else 0
        return filler_ratio
    except:
        return 0  # Return 0 if speech recognition fails

In [None]:
def analyze_filler_word_scores(paths):
    scores = {}
    for path in paths:
        if path.lower().endswith('.wav'):
            score = detect_filler_words(path)
            filename = os.path.basename(path)
            scores[filename] = score
    return scores

# Directory containing your audio files
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'
files = os.listdir(dir2)
new_paths = [os.path.join(dir2, file) for file in files]
print(new_paths)

# Analyze the audio files and get filler word scores
filler_word_scores = analyze_filler_word_scores(new_paths)

# Print the results
for filename, score in filler_word_scores.items():
    print(f"File: {filename} - Filler Word Ratio: {score:.2f}")


['/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_18.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_20.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_22.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_24.wav', '/con

# Total Evaluation
#### This code evaluates interview performance by analyzing different audio segments. It calculates an overall interview score for each segment based on modulation, confidence, and filler word usage, then computes the final score as the average of all segment scores. The final interview score is printed on a scale of 0 to 100.

In [None]:
import os
dir2 = '/content/drive/MyDrive/Jyothi Mam/second/student_audios'
f1 = os.listdir(dir2)
student_segments = []
for i in f1:
  file_path=os.path.join(dir2,i)
  student_segments.append(file_path)
print(student_segments)
print(len(student_segments))

['/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_2.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_4.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_6.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_8.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_10.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_12.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_14.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_16.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_18.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_20.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_22.wav', '/content/drive/MyDrive/Jyothi Mam/second/student_audios/SPEAKER_00_24.wav', '/con

In [37]:
def assess_interview(audio_file):
    y, sr = librosa.load(audio_file)

    modulation_score = analyze_modulation(y, sr)
    confidence_score = assess_confidence(y, sr)
    filler_ratio = detect_filler_words(audio_file)
    grade_score = calculate_grade(predicted_labels)

    # Normalize scores
    modulation_norm = np.clip(modulation_score / 350, 0, 1)  # Normalized to a scale of 0-1
    confidence_norm = np.clip(confidence_score, 0, 1)    # Normalized to a scale of 0-1
    filler_norm = 1 - np.clip(filler_ratio, 0, 1)            # Invert so that lower filler word usage is better

    # Combine scores (adjust weights as needed)
    overall_score = (modulation_norm * 0.25 + confidence_norm * 0.35 + filler_norm * 0.20 + grade_score /100 * 0.20) * 100

    return overall_score

# Assess each segment
segment_scores = [assess_interview(segment) for segment in student_segments]

# Calculate final score
final_score = np.mean(segment_scores)

# Print final score
print(f"Final Interview Score: {final_score:.2f}")

Final Interview Score: 67.23


#### This code generates a detailed report of an interview assessment, including the overall score, scores for individual segments, and areas for improvement based on the final score and segment scores. It provides specific recommendations for improving interview skills, consistency, and confidence based on the scores.

In [39]:
def generate_report(final_score, segment_scores):
    report = f"Interview Assessment Report\n"
    report += f"Overall Score: {final_score:.2f}/100\n\n"
    report += f"Segment Scores:\n"
    for i, score in enumerate(segment_scores):
        score_value = float(score)  # Convert to float (or use score.item())
        report += f"Segment {i+1}: {score_value:.2f}/100\n"

    # report += "\nAreas for Improvement:\n"
    # if final_score < 60:
    #     report += "- Work on overall interview skills, including voice modulation and confidence\n"
    # if min(segment_scores) < final_score - 10:
    #     report += "- Aim for more consistent performance across all responses\n"
    # if final_score < 70:
    #     report += "- Practice reducing filler words and speaking more confidently\n"

    return report


# Print final report
print(generate_report(final_score, segment_scores))


Interview Assessment Report
Overall Score: 67.23/100

Segment Scores:
Segment 1: 68.04/100
Segment 2: 68.53/100
Segment 3: 66.57/100
Segment 4: 64.43/100
Segment 5: 73.38/100
Segment 6: 67.01/100
Segment 7: 69.21/100
Segment 8: 62.66/100
Segment 9: 66.89/100
Segment 10: 70.12/100
Segment 11: 70.03/100
Segment 12: 65.14/100
Segment 13: 71.70/100
Segment 14: 67.06/100
Segment 15: 68.98/100
Segment 16: 61.24/100
Segment 17: 68.13/100
Segment 18: 74.53/100
Segment 19: 70.58/100
Segment 20: 64.30/100
Segment 21: 63.69/100
Segment 22: 66.97/100
Segment 23: 69.87/100
Segment 24: 65.09/100
Segment 25: 62.30/100
Segment 26: 59.02/100
Segment 27: 68.84/100
Segment 28: 68.04/100



  score_value = float(score)  # Convert to float (or use score.item())
