# Prior to starting the application, you need to obtain your free Groq API key from https://console.groq.com/keys. ⛔

**Replace the key at this point in the code below ⬇️**

GROQ_API_KEY = "abcdefghijkl_1234567890"  # Replace with your Groq API key

In [None]:
GROQ_API_KEY_USER = "gsk_djt1FIWJ6TjVrSamnbkOWGdyb3FYIP0H8Qsdgfdsgfdhdfh"  # Replace with your Groq API key

# Set the value for scoring criteria. Set "True" for whichever component you want to use and leave it "False" for the rest.

In [None]:
# Visibility Settings — set exactly one to True
only_hands_visible = False
both_hands_and_legs_visible = False
no_hands_no_legs_visible = True



---



# To run the application Go to Runtime button in the Navigation bar above and click on the Run All option. The Code will now run automatically.

# Sometimes, there could be an error that occurs when the Stage 2 code is running. Do not worry. Simply go to Runtime > Restart Session > Run All. It will start again but this time the error will not be there. It occurs because of some installation defaults in the code (No need to bother).

**NOTE - Remember you have added the API key above prior to doing this step.**

**For any concerns write to info@alphaai.biz**






---



# Stage 1 - The dependencies will install automatically. Do not terminate the session, close the browser tab or interrrupt the execution by any means possible.

In [None]:
# Uninstall conflicting packages
!pip uninstall -y numpy pandas mediapipe librosa speechrecognition opencv-python ffmpeg-python langchain-groq

# Install compatible versions
!pip install numpy==1.26.4
!pip install mediapipe==0.10.14
!pip install pandas==2.2.2
!pip install librosa==0.10.2
!pip install speechrecognition==3.10.4
!pip install opencv-python==4.10.0.84
!pip install ffmpeg-python==0.2.0
!pip install langchain-groq==0.3.0

# Install Whisper for speech recognition
!pip install openai-whisper==20231117

# If the above installation is successful then there would be a number inclosed within the square bracket. For example [1] or [2].



---



# Stage 2 - Here the code will run automatically and ask you to upload your video file for analysis.

In [None]:
import groq
import httpx
print(f"groq: {groq.__version__}, httpx: {httpx.__version__}")

In [None]:
from groq import Groq
client = Groq(api_key=GROQ_API_KEY_USER)
print("Groq client initialized!")

**NOTE - In case you do not wish to work with ranges then refer to the code below. Nothing that technical just look for the System Prompt and Report Generation and refer to the Comment/Un-Comment instructions there.**

In [None]:
import cv2
import mediapipe as mp
import ffmpeg
import librosa
import numpy as np
import os
from groq import Groq
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from google.colab import files

# Initialize MediaPipe Holistic
try:
    mp_holistic = mp.solutions.holistic
    holistic = mp_holistic.Holistic(
        static_image_mode=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )
except Exception as e:
    print(f"Failed to initialize MediaPipe: {e}")
    exit(1)

# Initialize Groq client
GROQ_API_KEY = GROQ_API_KEY_USER
try:
    client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
    print(f"Failed to initialize Groq client: {e}")
    exit(1)

# Initialize Groq LLM for report generation
try:
    llm = ChatGroq(
        model="llama-3.3-70b-versatile",
        temperature=0.7,
        max_tokens=2000,
        api_key=GROQ_API_KEY
    )
except Exception as e:
    print(f"Failed to initialize Groq LLM: {e}")
    exit(1)

# System prompt for detailed, single-score reporting
prompt = ChatPromptTemplate.from_messages([
    ("system", """
You are an expert in evaluating public speaking skills for management students, using a scoring system designed to replicate uSpeek's single-score output (1 to 5). Generate a detailed, student-friendly report based on the provided data. The report must include:

- **Introduction**: Explain the purpose of the evaluation and the components assessed (facial expressions, speech quality, content quality).
- **Facial Expressions**: Describe the smiling ratio and its impact on the score (2.0 for 0% smiling, up to 5.0 for 100% smiling). Highlight strengths, weaknesses, and specific improvements. Target ~{facial_expressions_target}/5.
- **Speech Quality**: Explain modulation (pitch and volume variation), pitch (~150 Hz), and volume (~40 dB). Detail score calculation, strengths, weaknesses, and improvements. Target ~{speech_quality_target}/5.
- **Content Quality**: Assess clarity, relevance, impact, noting filler words (penalized by 0.05 if ratio >5%). Highlight strengths, weaknesses, and improvements. Target ~{content_quality_target}/5.
- **Final Score**: Present the single weighted average score (30% facial expressions, 30% speech quality, 40% content quality) and a +/- 0.2 range (clamped to [1,5]). Explain the weighting, score, and range in simple terms.
- **Recommendations**: Provide specific, actionable advice for each component to help the student improve.

Use a clear, encouraging, and professional tone to motivate students.
"""),
    ("human", """
Facial Expressions: {facial_expressions}
Speech Transcript: {transcript}
Audio Characteristics: Pitch std: {pitch_std}, Volume std: {volume_std}, Avg pitch: {avg_pitch} Hz, Avg volume: {avg_volume} dB
Filler Words: {filler_words}
Pet Words: {pet_words}
Preliminary Scores:
- Facial Expressions: {facial_expressions_score}/5
- Speech Quality: {speech_quality_score}/5
- Content Quality: {content_quality_score}/5
"""),
])

# Audio extraction
def extract_audio(video_path, audio_path):
    stream = ffmpeg.input(video_path)
    stream = ffmpeg.output(stream, audio_path, acodec='pcm_s16le', ar=16000, ac=1)
    ffmpeg.run(stream, overwrite_output=True, capture_stdout=True, capture_stderr=True)

# Transcription
def transcribe_audio(audio_path):
    try:
        with open(audio_path, 'rb') as f:
            res = client.audio.transcriptions.create(
                file=(os.path.basename(audio_path), f.read()),
                model='whisper-large-v3', response_format='json', language='en', temperature=0.0
            )
        return res.text
    except:
        return ''

# Filler and pet words analysis
def analyze_filler_pet_words(transcript):
    words = transcript.lower().split()
    fillers = ['and','that','really','now','just','um','uh','like']
    pets = ['i','to','the','of']
    fcnt = {w:words.count(w) for w in fillers if words.count(w)>0}
    pcnt = {w:words.count(w) for w in pets if words.count(w)>0}
    total = len(words)
    fr = sum(fcnt.values())/total if total else 0
    return fcnt, pcnt, fr

# Audio analysis for speech quality
def analyze_audio(audio_path):
    y, sr = librosa.load(audio_path)
    f0 = librosa.yin(y, fmin=80, fmax=500, sr=sr)
    avg_pitch = float(np.median(f0[f0>0])) if np.any(f0>0) else 150.0
    pitch_std = float(np.std(f0[f0>0])) if np.any(f0>0) else 0.0
    rms = librosa.feature.rms(y=y)[0]
    avg_volume = float(20*np.log10(np.mean(rms)+1e-10)+40)
    volume_std = float(np.std(rms))
    mod_score = min(4.5, (pitch_std/80 + volume_std/0.01)*0.7)
    pitch_pen = max(0, (abs(avg_pitch - 150) - 50) / 500)
    vol_pen = max(0, (40 - avg_volume) / 60)
    speech_score = min(5, max(1, mod_score - pitch_pen - vol_pen))
    return pitch_std, volume_std, avg_pitch, avg_volume, speech_score

# Content quality analysis
def analyze_content(transcript, fr):
    p = ChatPromptTemplate.from_messages([
        ('system', 'Evaluate clarity, relevance, impact on a scale of 1 to 5. Return only the numerical score.'),
        ('human', '{transcript}')
    ])
    resp = (p | llm).invoke({'transcript': transcript})
    try:
        base = float(resp.content.strip())
    except:
        base = 4.0
    penalty = 0.05 if fr > 0.05 else 0
    return min(5, max(1, base - penalty))

# Video analysis for facial expressions
def analyze_video(video_path, only_hands_visible=False, both_hands_and_legs_visible=False, no_hands_no_legs_visible=False):
    if [only_hands_visible, both_hands_and_legs_visible, no_hands_no_legs_visible].count(True) != 1:
        raise ValueError('Set exactly one visibility flag to True')
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError('Cannot open video')
    cnt = 0
    expr = []
    while True:
        ret, frm = cap.read()
        if not ret:
            break
        rgb = cv2.cvtColor(frm, cv2.COLOR_BGR2RGB)
        res = holistic.process(rgb)
        if res.face_landmarks:
            m1 = res.face_landmarks.landmark[61]
            m2 = res.face_landmarks.landmark[291]
            expr.append('Smiling' if np.hypot(m2.x - m1.x, m2.y - m1.y) > 0.05 else 'Neutral')
        cnt += 1
    cap.release()
    if expr:
        sr = expr.count('Smiling') / len(expr)
        fe_sc = min(5.0, 2.0 + sr * 3.0)
        fe_desc = f'Smiling {sr*100:.1f}%'
    else:
        fe_sc = 2.0
        fe_desc = 'No facial data'
    return '', fe_desc, 0, fe_sc

# Updated report generation with single score and verbose output
def generate_report(video_path, output_path, only_hands_visible=False, both_hands_and_legs_visible=False, no_hands_no_legs_visible=False):
    ap = 'temp.wav'
    try:
        extract_audio(video_path, ap)
        tr = transcribe_audio(ap)
        ps, vs, ap_pitch, av, ss = analyze_audio(ap)
        fcnt, pcnt, fr = analyze_filler_pet_words(tr)
        cs = analyze_content(tr, fr)
        _, fe_desc, _, fe_sc = analyze_video(video_path, only_hands_visible, both_hands_and_legs_visible, no_hands_no_legs_visible)

        # Weighted average: 30% facial, 30% speech, 40% content
        avg_score = 0.3 * fe_sc + 0.3 * ss + 0.4 * cs
        single_score = round(avg_score, 2)
        low = round(max(1, avg_score - 0.2), 1)
        high = round(min(5, avg_score + 0.2), 1)

        # Targets for reporting
        fe_target = 3.5
        sq_target = 3.5
        cq_target = 4.0
        fs_target = 3.5

        resp = (prompt | llm).invoke({
            'facial_expressions': fe_desc,
            'transcript': tr,
            'pitch_std': ps,
            'volume_std': vs,
            'avg_pitch': ap_pitch,
            'avg_volume': av,
            'filler_words': str(fcnt),
            'pet_words': str(pcnt),
            'facial_expressions_score': fe_sc,
            'speech_quality_score': ss,
            'content_quality_score': cs,
            'facial_expressions_target': fe_target,
            'speech_quality_target': sq_target,
            'content_quality_target': cq_target,
            'final_score_target': fs_target
        })

        # Verbose, student-friendly report
        report = f"""
**Public Speaking Skills Evaluation Report**

**Introduction**
Welcome to your personalized public speaking evaluation! This report analyzes your performance to help you become a more confident and effective speaker. We assess three key components: **Facial Expressions**, **Speech Quality**, and **Content Quality**. Each is scored from 1 to 5, with 1 indicating significant room for improvement and 5 representing an outstanding performance. Your final score is a single value, calculated as a weighted average (30% facial expressions, 30% speech quality, 40% content quality), with a ±0.2 range to show where your performance likely falls. This approach mirrors industry-standard evaluations, like uSpeek, and provides clear, actionable feedback to guide your growth.

**Facial Expressions: {fe_sc:.1f}/5**
   - **How It’s Evaluated**: We measure your smiling ratio—the percentage of time you display a smile—using facial landmark analysis. A 0% smiling ratio scores 2.0, while 100% scores 5.0, reflecting engagement and positivity. Your smiling ratio is {fe_desc}, contributing to a score of {fe_sc:.1f}/5.
   - **Strengths**: {'Your frequent smiling conveys warmth and connects well with the audience.' if fe_sc >= 3.5 else 'You maintain some positive expressions, setting a foundation for engagement.'}
   - **Areas for Improvement**: {'Your smiling ratio is relatively low, which may make your delivery seem less engaging.' if fe_sc < 3.0 else 'Increasing your smiling frequency can further enhance audience connection.'}
   - **Recommendations**: Practice speaking in front of a mirror to consciously increase your smiling, aiming for a ratio above 50%. Record your practice sessions to observe your expressions. Try engaging with your audience through eye contact and subtle nods to boost perceived positivity.

**Speech Quality: {ss:.1f}/5**
   - **How It’s Evaluated**: We analyze your voice’s modulation (variation in pitch and volume), average pitch (target 150 Hz), and average volume (target 40 dB). Pitch is evaluated within a typical voice range (100–200 Hz) without penalty, and volume penalties are minimal to reward natural delivery. Your score of {ss:.1f}/5 reflects your pitch variation (standard deviation {ps:.1f} Hz) and volume consistency ({vs:.3f}).
   - **Strengths**: {'Your dynamic pitch and volume keep the audience engaged, showcasing strong vocal delivery.' if ss >= 3.5 else 'Your voice shows good modulation, providing a solid base for effective communication.'}
   - **Areas for Improvement**: {'Your volume ({av:.1f} dB) is below the ideal range, potentially reducing impact.' if av < 40 else 'Further varying your pitch and volume can add more emphasis to key points.'}
   - **Recommendations**: Practice projecting your voice to reach a volume closer to 40 dB, especially in larger settings. Use vocal exercises, like reading aloud with exaggerated intonation, to enhance pitch variation. Record and listen to your speeches to identify areas for more dynamic delivery.

**Content Quality: {cs:.1f}/5**
   - **How It’s Evaluated**: Your speech’s clarity, relevance, and impact are scored from 1 to 5, with a base score reflecting content strength. Filler words ({fcnt}) and pet words ({pcnt}) are analyzed, with a 0.05 penalty if fillers exceed 5% of your speech. Your score of {cs:.1f}/5 indicates a {'strong' if cs >= 4.0 else 'solid'} message with {'minor' if fr <= 0.05 else 'some'} filler word usage.
   - **Strengths**: {'Your content is clear, relevant, and impactful, effectively conveying your message.' if cs >= 4.0 else 'Your speech is well-structured, making your points accessible to the audience.'}
   - **Areas for Improvement**: {'Excessive filler words slightly disrupt fluency.' if fr > 0.05 else 'Reducing minor filler words and adding vivid examples can elevate impact.'}
   - **Recommendations**: Practice pausing instead of using fillers (e.g., “um,” “like”) to gather thoughts. Rehearse your speech to build fluency and reduce repetitive words. Incorporate stories, data, or examples to make your content more engaging and memorable.

**Final Score: {single_score:.2f}/5**
   - **Calculation**: Your final score is a single value, calculated as a weighted average to reflect the importance of each component:
     \[
     (0.3 \times {fe_sc:.1f}) + (0.3 \times {ss:.1f}) + (0.4 \times {cs:.1f}) = {single_score:.2f}
     \]
   - **Score Range**: To provide context, we include a ±0.2 range: **{low:.1f}–{high:.1f}/5**. This shows the likely range of your performance, with 1 being “needs significant improvement” and 5 being “outstanding.” Your score of {single_score:.2f}/5 indicates {'a strong performance with room to shine' if single_score >= 3.5 else 'a solid foundation with clear areas for growth'}.
   - **Interpretation**: Your score aligns with industry-standard evaluations, like uSpeek, and reflects your ability to engage and communicate effectively. {'You’re on track to become an excellent speaker with targeted improvements.' if single_score >= 3.5 else 'With practice, you can boost your score and impact.'}

**Recommendations for Improvement**
   - **Facial Expressions**: Increase your smiling frequency to convey enthusiasm. Practice in front of a mirror or record yourself to monitor progress. Aim for a smiling ratio above 50% to enhance audience connection.
   - **Speech Quality**: Project your voice louder (closer to 40 dB) to ensure clarity and impact. Practice varying pitch during key points using vocal exercises, like reading with exaggerated intonation. Review recordings to refine dynamism.
   - **Content Quality**: Reduce filler words by pausing deliberately. Rehearse multiple times to improve fluency. Add compelling stories or examples to make your content more memorable and impactful.
   - **General Tips**: Join a public speaking group, like Toastmasters, for regular practice and feedback. Record and analyze your speeches to track improvement. Seek peer or instructor feedback to identify subtle areas for growth.

**Conclusion**
Your public speaking performance demonstrates {'notable strengths' if single_score >= 3.5 else 'a promising foundation'}, particularly in {'speech quality and content' if ss >= 3.5 and cs >= 3.5 else 'specific areas'}. By focusing on enhancing facial expressions, refining vocal delivery, and polishing content, you can elevate your skills and captivate your audience. Keep practicing, use this feedback to guide your efforts, and you’ll continue to grow as a confident communicator!

**Final Score: {single_score:.2f}/5 (Range: {low:.1f}–{high:.1f}/5)**
"""
        with open(output_path, 'w') as f:
            f.write(report)
        return report
    finally:
        if os.path.exists(ap):
            os.remove(ap)

# Main function
def main():
    print('Upload your video')
    u = files.upload()
    if not u:
        print('No file')
        return
    vp = list(u.keys())[0]
    op = 'evaluation_report.txt'
    rep = generate_report(vp, op, only_hands_visible=False, both_hands_and_legs_visible=False, no_hands_no_legs_visible=True)
    print(rep)
    files.download(op)

if __name__ == '__main__':
    main()

# Thank for using our free tool! If it is posssilbe for you then do share it along and let others benefit from the same 🤗

**Credits: Alpha AI Team (www.alphaai.biz)**