In [1]:
pip install fastapi uvicorn language-tool-python vaderSentiment


Collecting language-tool-python
  Downloading language_tool_python-3.0.0-py3-none-any.whl.metadata (17 kB)
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading language_tool_python-3.0.0-py3-none-any.whl (48 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.8/48.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment, language-tool-python
Successfully installed language-tool-python-3.0.0 vaderSentiment-3.3.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
!pip install vaderSentiment


Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re

# ----------------------- Utility helpers -----------------------

def clean_text(t):
    return re.sub(r"\s+", " ", t.strip().lower())

def count_words(t):
    return len(t.split())

def ttr_score(words):
    tokens = [w for w in words if w.isalpha()]
    if len(tokens) == 0:
        return 0
    distinct = len(set(tokens))
    return distinct / len(tokens)

def filler_word_rate(transcript, total_words):
    filler_list = ["um","uh","like","you know","so","actually","basically",
                   "right","i mean","well","kinda","sort of","okay","hmm","ah"]
    count = 0
    for f in filler_list:
        count += transcript.lower().count(f)
    return (count / total_words) * 100, count


def check_salutation(t):
    if any(x in t for x in ["i am excited", "feeling great"]):
        return 5
    if any(x in t for x in ["good morning", "good afternoon", "good evening", "hello everyone"]):
        return 4
    if any(x in t for x in ["hi", "hello"]):
        return 2
    return 0


def keyword_score(transcript):
    must_have = {
        "name": ["my name is", "myself", "i am"],
        "age": ["years old"],
        "school_class": ["class", "school"],
        "family": ["family", "parents", "mother", "father"],
        "hobby": ["hobby", "cricket", "playing", "reading", "music"]
    }

    good_to_have = {
        "family_details": ["kind hearted", "soft spoken"],
        "origin": ["i am from"],
        "goal": ["my goal", "my dream", "i want to become"],
        "unique": ["fun fact", "unique", "one thing"],
        "achievement": ["achievement", "strong", "strength"]
    }

    must_score = 0
    good_score = 0

    for arr in must_have.values():
        if any(a in transcript for a in arr):
            must_score += 4

    for arr in good_to_have.values():
        if any(a in transcript for a in arr):
            good_score += 2

    return must_score, good_score



# ---------------------- FIXED FLOW CHECK ------------------------

def flow_check(raw):
    t = raw.lower()

    stages = {
        "salutation": ["hello", "good morning", "good afternoon", "good evening"],
        "basic": ["myself", "my name", "i am 13", "class", "school"],
        "extra": ["family", "playing", "cricket", "fun fact", "unique"],
        "closing": ["thank you"]
    }

    positions = {}

    for stage, keys in stages.items():
        found = [t.find(k) for k in keys if t.find(k) != -1]
        positions[stage] = min(found) if found else 999999

    # Check order
    if positions["salutation"] < positions["basic"] < positions["extra"] < positions["closing"]:
        return 5
    return 0



# ---------------------- GRAMMAR CHECKER ------------------------

def simple_grammar_score(text):
    errors = 0

    # Rule 1: double spaces
    if "  " in text:
        errors += 1

    # Rule 2: repeated words
    words = text.split()
    for i in range(len(words)-1):
        if words[i] == words[i+1]:
            errors += 1

    # Rule 3: punctuation at end
    if not text.strip().endswith((".", "!", "?")):
        errors += 1

    # Rule 4: common mistakes
    if "there are 1" in text or "there is 3" in text:
        errors += 1

    total_words = count_words(text)
    errors_per_100 = (errors / total_words) * 100 if total_words else 0

    raw = 1 - min(errors_per_100 / 10, 1)

    if raw > 0.9:
        return 10
    if raw >= 0.7:
        return 8
    if raw >= 0.5:
        return 6
    if raw >= 0.3:
        return 4
    return 2



# ---------------------- FIXED SENTIMENT ------------------------

sentiment_analyzer = SentimentIntensityAnalyzer()

def sentiment_score(raw_text):
    score = sentiment_analyzer.polarity_scores(raw_text)["pos"]
    if score >= 0.9:
        return 15
    if score >= 0.7:
        return 12
    if score >= 0.5:
        return 9
    if score >= 0.3:
        return 6
    return 3



# ---------------------- MAIN SCORER ------------------------

def score_transcript(transcript, duration_sec):
    text = clean_text(transcript)  # cleaned
    total_words = count_words(text)
    wpm = total_words / (duration_sec / 60)

    salutation_pts = check_salutation(text)
    must_pts, good_pts = keyword_score(text)
    
    # FIX: Use RAW transcript for flow
    flow_pts = flow_check(transcript)

    # Speech rate scoring
    if wpm > 161:
        speech_pts = 2
    elif wpm >= 141:
        speech_pts = 6
    elif wpm >= 111:
        speech_pts = 10
    elif wpm >= 81:
        speech_pts = 6
    else:
        speech_pts = 2

    grammar_pts = simple_grammar_score(text)

    tokens = text.split()
    ttr_val = ttr_score(tokens)

    if ttr_val >= 0.9:
        vocab_pts = 10
    elif ttr_val >= 0.7:
        vocab_pts = 8
    elif ttr_val >= 0.5:
        vocab_pts = 6
    elif ttr_val >= 0.3:
        vocab_pts = 4
    else:
        vocab_pts = 2

    filler_rate_val, filler_count = filler_word_rate(text, total_words)
    if filler_rate_val <= 3:
        filler_pts = 15
    elif filler_rate_val <= 6:
        filler_pts = 12
    elif filler_rate_val <= 9:
        filler_pts = 9
    elif filler_rate_val <= 12:
        filler_pts = 6
    else:
        filler_pts = 3

    # FIX: Use RAW transcript for sentiment
    sent_pts = sentiment_score(transcript)

    total = salutation_pts + must_pts + good_pts + flow_pts + speech_pts + grammar_pts + vocab_pts + filler_pts + sent_pts

    return {
        "overall_score": total,
        "details": {
            "words": total_words,
            "wpm": round(wpm, 2),
            "salutation": salutation_pts,
            "must_have_keywords": must_pts,
            "good_to_have_keywords": good_pts,
            "flow": flow_pts,
            "speech_rate": speech_pts,
            "grammar": grammar_pts,
            "vocab_richness": vocab_pts,
            "filler_rate_percent": round(filler_rate_val, 2),
            "sentiment_points": sent_pts
        }
    }



# ---------------------- TEST ------------------------

sample = """Hello everyone, myself Muskan, studying in class 8th B section from Christ Public School. 
I am 13 years old. I live with my family. There are 3 people in my family, me, my mother and my father.
One special thing about my family is that they are very kind hearted to everyone and soft spoken. One thing I really enjoy is play, playing cricket and taking wickets.
A fun fact about me is that I see in mirror and talk by myself. One thing people don't know about me is that I once stole a toy from one of my cousin.
My favorite subject is science because it is very interesting. Through science I can explore the whole world and make the discoveries and improve the lives of others.
Thank you for listening."""

score_transcript(sample, 52)


{'overall_score': 73,
 'details': {'words': 133,
  'wpm': 153.46,
  'salutation': 4,
  'must_have_keywords': 20,
  'good_to_have_keywords': 4,
  'flow': 5,
  'speech_rate': 6,
  'grammar': 10,
  'vocab_richness': 6,
  'filler_rate_percent': 0.75,
  'sentiment_points': 3}}

In [None]:
import re
import gradio as gr
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ----------------------- Utility Helpers -----------------------

def clean_text(t):
    return re.sub(r"\s+", " ", t.strip().lower())

def count_words(t):
    return len(t.split())

def ttr_score(words):
    tokens = [w for w in words if w.isalpha()]
    if not tokens:
        return 0
    return len(set(tokens)) / len(tokens)

def filler_word_rate(text, total_words):
    fillers = ["um","uh","like","you know","so","actually","basically",
               "right","i mean","well","kinda","sort of","okay","hmm","ah"]
    lw = text.lower()
    count = sum(lw.count(f) for f in fillers)
    return (count / total_words) * 100, count

def check_salutation(text):
    t = text.lower()
    if "i am excited" in t or "feeling great" in t:
        return 5
    if any(x in t for x in ["good morning", "good afternoon", "good evening", "hello everyone"]):
        return 4
    if "hello" in t or "hi" in t:
        return 2
    return 0

def keyword_score(text):
    t = text.lower()

    must = {
        "name": ["my name is", "myself", "i am"],
        "age": ["years old"],
        "school_class": ["class", "school"],
        "family": ["family", "parents", "mother", "father"],
        "hobby": ["hobby", "cricket", "playing", "reading", "music"]
    }

    good = {
        "family_details": ["kind hearted", "soft spoken"],
        "unique": ["fun fact", "unique", "one thing"],
        "origin": ["i am from"],
        "goal": ["my goal", "my dream", "i want to become"],
        "achievement": ["achievement", "strong", "strength"]
    }

    must_score = sum(4 for ks in must.values() if any(k in t for k in ks))
    good_score = sum(2 for ks in good.values() if any(k in t for k in ks))
    return must_score, good_score

def flow_check(raw_text):
    t = raw_text.lower()

    stages = {
        "salutation": ["hello", "good morning", "good afternoon", "good evening"],
        "basic": ["myself", "my name", "i am 13", "class", "school"],
        "extra": ["family", "playing", "cricket", "fun fact", "unique"],
        "closing": ["thank you"]
    }

    positions = {}
    for stage, keys in stages.items():
        found = [t.find(k) for k in keys if k in t]
        positions[stage] = min(found) if found else float("inf")

    if positions["salutation"] < positions["basic"] < positions["extra"] < positions["closing"]:
        return 5
    return 0

def simple_grammar_score(text):
    errors = 0

    if "  " in text:
        errors += 1

    words = text.split()
    for i in range(len(words)-1):
        if words[i] == words[i+1]:
            errors += 1

    if not text.rstrip().endswith((".", "!", "?")):
        errors += 1

    total_words = count_words(text)
    errors_per_100 = (errors / total_words) * 100 if total_words else 0
    raw = 1 - min(errors_per_100 / 10, 1)

    if raw > 0.9:
        return 10
    if raw >= 0.7:
        return 8
    if raw >= 0.5:
        return 6
    if raw >= 0.3:
        return 4
    return 2

sentiment_analyzer = SentimentIntensityAnalyzer()

def sentiment_score(raw_text):
    cleaned_raw = raw_text.strip().replace("\n", " ").replace("  ", " ")
    pos = sentiment_analyzer.polarity_scores(cleaned_raw)["pos"]
    
    if pos >= 0.9:
        return 15
    if pos >= 0.7:
        return 12
    if pos >= 0.5:
        return 9
    if pos >= 0.3:
        return 6
    return 3

# ----------------------- MAIN SCORING FUNCTION -----------------------

def score_transcript(transcript, duration_sec):
    if not transcript.strip():
        return {"error": "Please enter a transcript."}

    cleaned = clean_text(transcript)
    total_words = count_words(cleaned)

    if duration_sec <= 0:
        duration_sec = 1

    wpm = total_words / (duration_sec / 60)

    sal = check_salutation(cleaned)
    must, good = keyword_score(cleaned)
    flow = flow_check(transcript)  # RAW FLOW

    if wpm > 161:
        speech = 2
    elif wpm >= 141:
        speech = 6
    elif wpm >= 111:
        speech = 10
    elif wpm >= 81:
        speech = 6
    else:
        speech = 2

    grammar = simple_grammar_score(cleaned)

    ttr_val = ttr_score(cleaned.split())

    if ttr_val >= 0.9:
        vocab = 10
    elif ttr_val >= 0.7:
        vocab = 8
    elif ttr_val >= 0.5:
        vocab = 6
    elif ttr_val >= 0.3:
        vocab = 4
    else:
        vocab = 2

    filler_rate, _ = filler_word_rate(cleaned, total_words)

    if filler_rate <= 3:
        filler = 15
    elif filler_rate <= 6:
        filler = 12
    elif filler_rate <= 9:
        filler = 9
    elif filler_rate <= 12:
        filler = 6
    else:
        filler = 3

    sent = sentiment_score(transcript)  # FIXED SENTIMENT

    total = sal + must + good + flow + speech + grammar + vocab + filler + sent

    return {
        "overall_score": total,
        "details": {
            "words": total_words,
            "wpm": round(wpm, 2),
            "salutation": sal,
            "must_have_keywords": must,
            "good_to_have_keywords": good,
            "flow": flow,
            "speech_rate": speech,
            "grammar": grammar,
            "vocab_richness": vocab,
            "filler_rate_percent": round(filler_rate, 2),
            "sentiment_points": sent
        }
    }

# ----------------------- GRADIO UI -----------------------

demo = gr.Interface(
    fn=score_transcript,
    inputs=[
        gr.Textbox(lines=12, label="Transcript", placeholder="Paste transcript here..."),
        gr.Number(label="Duration (seconds)", value=52)
    ],
    outputs=gr.JSON(label="Score Output"),
    title="Self Introduction Scoring App",
    description="Paste transcript + duration. Get rubric-based AI scoring instantly."
)

demo.launch(debug=True)


* Running on local URL:  http://127.0.0.1:7860
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://0b571fa624e5f4fa35.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
