In [None]:
import sys, subprocess

subprocess.run(['apt-get', 'update', '-qq'], check=True)
subprocess.run(['apt-get', 'install', '-y', '-qq', 'ffmpeg'], check=True)

subprocess.run([
    sys.executable, '-m', 'pip', 'install', '-q',
    'SpeechRecognition', 'transformers', 'torch', 'datasets',
    'scikit-learn', 'evaluate'
], check=True)

print("✅ Dependencies installed.")


✅ Dependencies installed.


In [None]:
!pip uninstall -y transformers
!pip install -q transformers --upgrade

from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import pickle

# Load and process dataset
df = pd.read_csv("/content/Dataset_2.csv")
df['sentence'] = df['sentence'].astype(str)

label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['sentiment'])

dataset = Dataset.from_pandas(df[['sentence', 'label']].reset_index(drop=True))
dataset = dataset.train_test_split(test_size=0.2, seed=42)
dataset_dict = DatasetDict({
    'train': dataset['train'],
    'test': dataset['test']
})

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_fn(example):
    return tokenizer(example["sentence"], padding="max_length", truncation=True)

encoded_dataset = dataset_dict.map(tokenize_fn, batched=True, remove_columns=["sentence"])

model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=len(label_encoder.classes_)
)

# Metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

training_args = TrainingArguments(
    output_dir="./results",
    eval_steps=100,
    save_steps=100,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    report_to='none',
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
    compute_metrics=compute_metrics
)

trainer.train()

# Evaluate
metrics = trainer.evaluate()
print("📊 Evaluation Metrics:", metrics)

# Save model & tokenizer
model.save_pretrained("sentiment_model")
tokenizer.save_pretrained("sentiment_model")

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)


Found existing installation: transformers 4.51.3
Uninstalling transformers-4.51.3:
  Successfully uninstalled transformers-4.51.3


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/1216 [00:00<?, ? examples/s]

Map:   0%|          | 0/304 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
10,1.0721
20,0.4877
30,0.3696
40,0.1833
50,0.0709
60,0.0266
70,0.0728
80,0.037
90,0.0828
100,0.1035


📊 Evaluation Metrics: {'eval_loss': 0.0004046959220431745, 'eval_accuracy': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_f1': 1.0, 'eval_runtime': 1.3484, 'eval_samples_per_second': 225.446, 'eval_steps_per_second': 28.181, 'epoch': 5.0}


In [None]:
from IPython.display import HTML, Javascript, display
from google.colab import output
import base64
import speech_recognition as sr
import subprocess

# Save audio from browser
def _save_audio(b64):
    data = base64.b64decode(b64)
    with open('recorded.webm', 'wb') as f:
        f.write(data)
output.register_callback('notebook.saveAudio', _save_audio)

# UI buttons
display(HTML('''
  <button onclick="startRecording()">🎤 Start Recording</button>
  <button onclick="stopRecording()">⏹️ Stop Recording</button>
  <span id="status">Status: Idle</span>
'''))

# JS to capture audio in browser
display(Javascript("""
(async function() {
  let recorder, stream, chunks;
  window.startRecording = async () => {
    stream = await navigator.mediaDevices.getUserMedia({audio:true});
    recorder = new MediaRecorder(stream);
    chunks = [];
    recorder.ondataavailable = e => chunks.push(e.data);
    recorder.onstart = () => {
      document.getElementById('status').innerText = 'Status: Recording…';
    };
    recorder.onstop = () => {
      document.getElementById('status').innerText = 'Status: Processing…';
      const blob = new Blob(chunks, {type:'audio/webm'});
      const reader = new FileReader();
      reader.readAsDataURL(blob);
      reader.onloadend = () => {
        const b64 = reader.result.split(',')[1];
        google.colab.kernel.invokeFunction('notebook.saveAudio', [b64], {});
        document.getElementById('status').innerText = 'Status: Saved ✅';
      };
      stream.getTracks().forEach(t => t.stop());
    };
    recorder.start();
  };
  window.stopRecording = () => recorder && recorder.state === 'recording' && recorder.stop();
})()
"""))


<IPython.core.display.Javascript object>

In [None]:
# Convert to WAV for transcription
subprocess.run(['ffmpeg', '-i', 'recorded.webm', 'recorded.wav', '-y'], check=True)

recognizer = sr.Recognizer()
with sr.AudioFile('recorded.wav') as src:
    audio_data = recognizer.record(src)

try:
    transcript = recognizer.recognize_google(audio_data)
except:
    transcript = ""

print("🎙️ Transcript:")
print(transcript or "(No speech detected)")


🎙️ Transcript:
I want to kill myself


In [None]:
from transformers import pipeline
import pickle

# Load model and tokenizer
model_path = "sentiment_model"
sentiment_analyzer = pipeline("text-classification", model=model_path, tokenizer=model_path)

# Load label encoder
with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# Run sentiment analysis
result = sentiment_analyzer(transcript)[0] if transcript else {"label": None, "score": None}

# Decode prediction label
if result["label"] and result["label"].startswith("LABEL_"):
    label_index = int(result["label"].split("_")[1])
    decoded_label = label_encoder.inverse_transform([label_index])[0]
else:
    decoded_label = None

# --- Display Results ---

print("\n" + "="*50)
print("📝 Transcript Analysis")
print("="*50)
print(f"📄 Sentence: {transcript if transcript else '(No speech detected)'}")

print("\n" + "="*50)
print("📊 Sentiment Analysis")
print("="*50)
if decoded_label:
    print(f" • Sentiment Detected : {decoded_label}")
    print(f" • Confidence Score   : {result['score']:.2f}")

    # Check if negative sentiment exceeds 60%
    if decoded_label.lower() == "negative" and result['score'] > 0.6:
        print("\n🚨 ALERT: Patient at risk detected based on high negative sentiment!")
else:
    print(" • No sentiment detected.")

# --- Self-Harm Keyword Detection ---
self_harm_keywords = [
     "suicide", "commit suicide", "attempt suicide", "kill myself", "going to kill myself", "want to kill myself", "thinking about killing myself", "off myself", "end it all", "end my life",
     "finish it all", "finish myself off", "hang myself", "overdose", "overdose on pills", "take pills", "swallow pills", "slit my wrists", "slash my arms", "cut myself",
     "self harm", "self-harm", "hurt myself", "want to hurt myself", "cause myself pain", "i'm worthless", "worthless", "not worth living", "life isn't worth living", "no reason to live",
     "what's the point anymore", "feel like dying", "feeling suicidal", "suicidal thoughts", "suicidal ideation", "want it to end", "wish i were dead", "wish i was dead", "hope i'm dead", "death can't come soon enough",
     "ready to die", "can't go on", "can't keep living", "don't want to exist", "don't want to be here", "vanish forever", "disappear forever", "blow my brains out", "shoot myself", "put a bullet in my head",
     "please kill me", "kill me", "just kill me", "drown myself", "suffocate myself", "strangle myself", "crash my car", "jump off a bridge", "jump off a building", "jump in front of a train",
     "walk in front of traffic", "nothing matters", "meaningless life", "pointless existence", "broken beyond repair", "beyond saving", "can't face tomorrow", "everyone would be better off without me", "no one would miss me", "don't deserve to live",
     "wasted life", "time to die", "done with life", "done with this", "i'm done", "my life sucks", "life is meaningless", "final exit", "offing myself", "self destruct", "self destruction", "self electrocute",
     "drown myself","suffocate myself", 'OD', "overdose", "overdose on pills", "take pills", "swallow pills", "slit my wrists", "slash my arms", "cut myself",
     "kill", "die", "hopeless", "useless", "pointless", "empty", "sad", "depressed", "cry", "cut", "hurt", "hate", "disappear", "vanish", "tired", "done", "end", "alone",
     "broken", "slit", "drown", "strangle", "burn", "crash", "numb", "scared", "anxious", "panic", "afraid", "terrified", "angry", "furious", "rage", "mad", "can't go on", "no one cares",
     "nobody cares", "why am i here", "not okay", "life sucks",
]

found = [kw for kw in self_harm_keywords if kw in (transcript or "").lower()]

print("\n" + "="*50)
print("⚠️ Self-Harm Keyword Detection")
print("="*50)
if found:
    print("🚨 Warning: Potential self-harm indicators found!")
    for kw in found:
        print(f" • Keyword Detected: \"{kw}\"")
    print("\n📢 ACTION: Please escalate. Recommend contacting crisis support (e.g., Lifeline 13 11 14).")
else:
    print(" • No self-harm indicators detected.")

print("\n" + "="*50)


Device set to use cuda:0



📝 Transcript Analysis
📄 Sentence: I want to kill myself

📊 Sentiment Analysis
 • Sentiment Detected : negative
 • Confidence Score   : 1.00

🚨 ALERT: Patient at risk detected based on high negative sentiment!

⚠️ Self-Harm Keyword Detection
 • Keyword Detected: "kill myself"
 • Keyword Detected: "want to kill myself"
 • Keyword Detected: "kill"

📢 ACTION: Please escalate. Recommend contacting crisis support (e.g., Lifeline 13 11 14).

