<a href="https://colab.research.google.com/github/Aditya-sharma112245/student-intro-evaluator/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
# Install required packages
!pip install streamlit -q
!pip install language-tool-python -q
!pip install nltk -q
!pip install vaderSentiment -q
!pip install sentence-transformers -q
!pip install pyngrok -q

# Download NLTK resources
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab')

# Configure ngrok with your token
!ngrok authtoken 35qAefZ0oi7d3cwHUMpb9bkzXGh_37V6hoDx1UZSVdtr5Lm1v


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [41]:
%%writefile app.py
import streamlit as st
import language_tool_python
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import FreqDist
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sentence_transformers import SentenceTransformer, util
import re

# Initialize models
analyzer = SentimentIntensityAnalyzer()
model = SentenceTransformer('all-MiniLM-L6-v2')
tool = language_tool_python.LanguageTool('en-US')

# Rubric keywords
must_have_keywords = {
    "name": r"\b(?:my name is|i am|myself)\s+([A-Z][a-z]+)",
    "age": r"\b(\d{1,2})\s+years?\s+old\b",
    "class": r"\bclass\s+\d+[a-zA-Z]*\b",
    "school": r"\b(school|institute|college)\b",
    "family": r"\b(family|father|mother|brother|sister)\b",
    "hobbies": r"\b(play|reading|sports|hobby|interest)\b",
    "goal": r"\b(goal|dream|ambition)\b",
    "fun_fact": r"\b(fun fact|something unique|interesting thing)\b"
}

good_to_have_keywords = {
    "origin": r"\bfrom\b",
    "strength": r"\b(strength|achievement|skill)\b"
}

filler_words = ["um", "uh", "like", "you know", "so", "actually", "basically",
                "right", "i mean", "well", "kinda", "sort of", "okay", "hmm", "ah"]

def extract_keywords(text):
    found = []
    for k, pattern in must_have_keywords.items():
        if re.search(pattern, text.lower()):
            found.append(k)
    for k, pattern in good_to_have_keywords.items():
        if re.search(pattern, text.lower()):
            found.append(k)
    return found

def evaluate_transcript(text, duration_sec=52):
    words = word_tokenize(text)
    word_count = len(words)
    sentence_count = len(sent_tokenize(text))

    # --- Content & Structure ---
    # Salutation
    salutations = ["hi", "hello", "good morning", "good afternoon", "good evening", "good day", "hello everyone", "i am excited", "feeling great"]
    salutation_score = 0
    for sal in salutations:
        if sal in text.lower():
            if sal in ["i am excited", "feeling great"]:
                salutation_score = 5
            elif sal in ["good morning","good afternoon","good evening","good day","hello everyone"]:
                salutation_score = 4
            else:
                salutation_score = 2
            break

    # Keyword presence
    found_keywords = extract_keywords(text)
    must_score = sum(4 for k in must_have_keywords if k in found_keywords)
    good_score = sum(2 for k in good_to_have_keywords if k in found_keywords)
    keyword_score = min(30, must_score + good_score)

    # Flow check: basic order: Salutation → Basic Details → Additional → Closing
    flow_score = 0
    text_lower = text.lower()
    if any(s in text_lower for s in salutations):
        basic_details_order = ["name", "age", "class", "school"]
        if all(bd in found_keywords for bd in basic_details_order):
            flow_score = 5

    content_structure_score = salutation_score + keyword_score + flow_score  # max 40

    # --- Speech Rate ---
    wpm = word_count / (duration_sec/60)
    if 111 <= wpm <= 140:
        speech_rate_score = 10
    elif 141 <= wpm <= 160 or 81 <= wpm <= 110:
        speech_rate_score = 6
    elif wpm > 160:
        speech_rate_score = 2
    else:
        speech_rate_score = 2  # too slow

    # --- Language & Grammar ---
    matches = tool.check(text)
    errors_per_100 = len(matches)/max(word_count/100,1)
    grammar_score = max(2, min(10, int((1 - min(errors_per_100/10, 1))*10)))

    ttr = len(set(words))/word_count
    if ttr >= 0.9:
        vocab_score = 10
    elif ttr >=0.7:
        vocab_score = 8
    elif ttr >=0.5:
        vocab_score = 6
    elif ttr >=0.3:
        vocab_score = 4
    else:
        vocab_score = 2

    # --- Clarity (Filler words) ---
    filler_count = sum(words.count(w) for w in filler_words)
    filler_rate = (filler_count/word_count)*100
    if filler_rate <=3:
        filler_score = 15
    elif filler_rate <=6:
        filler_score = 12
    elif filler_rate <=9:
        filler_score = 9
    elif filler_rate <=12:
        filler_score = 6
    else:
        filler_score = 3

    # --- Engagement (Sentiment) ---
    sentiment = analyzer.polarity_scores(text)
    pos_prob = sentiment['pos']
    if pos_prob >= 0.9:
        sentiment_score = 15
    elif pos_prob >=0.7:
        sentiment_score = 12
    elif pos_prob >=0.5:
        sentiment_score = 9
    elif pos_prob >=0.3:
        sentiment_score = 6
    else:
        sentiment_score = 3

    # --- Semantic similarity (bonus) ---
    rubric_text = "Student introduction should include greeting, name, age, class, school, family details, hobbies, interests, goals, and unique/fun facts."
    embedding_sim = util.cos_sim(model.encode(text), model.encode(rubric_text)).item()
    semantic_score = min(10, max(0, int(embedding_sim*10)))

    # --- Total score ---
    total_score = content_structure_score + speech_rate_score + grammar_score + vocab_score + filler_score + sentiment_score + semantic_score
    total_score = min(100, total_score)

    # Feedback
    feedback = {
        "Salutation Score": salutation_score,
        "Keyword Score": keyword_score,
        "Flow Score": flow_score,
        "Speech Rate (WPM)": round(wpm,2),
        "Speech Rate Score": speech_rate_score,
        "Grammar Errors": len(matches),
        "Grammar Score": grammar_score,
        "Vocabulary TTR": round(ttr,2),
        "Vocabulary Score": vocab_score,
        "Filler Words Count": filler_count,
        "Filler Score": filler_score,
        "Positive Sentiment Probability": round(pos_prob,2),
        "Sentiment Score": sentiment_score,
        "Semantic Similarity": round(embedding_sim,2),
        "Semantic Score": semantic_score,
        "Keywords Found": found_keywords
    }

    return total_score, feedback

# --- Streamlit UI ---
st.title("Student Introduction Evaluator")
transcript = st.text_area("Paste Transcript Here:")

duration_sec = st.number_input("Duration of speech in seconds:", min_value=1, value=52)

if st.button("Score"):
    if transcript.strip() == "":
        st.warning("Please enter a transcript first.")
    else:
        score, feedback = evaluate_transcript(transcript, duration_sec)
        st.subheader(f"Overall Score: {score}/100")
        st.write("### Detailed Feedback:")
        st.json(feedback)


Overwriting app.py


In [43]:
from pyngrok import ngrok

# Kill all existing tunnels
ngrok.kill()


In [44]:
public_url = ngrok.connect(8501)
print("Your Streamlit app is live at:", public_url)


Your Streamlit app is live at: NgrokTunnel: "https://92bcc7ca6775.ngrok-free.app" -> "http://localhost:8501"
