<a href="https://colab.research.google.com/github/Philshirt18/cognisign2/blob/main/Cognisight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Cognisight — Reproducible Notebook

This notebook clones the repo, installs dependencies, loads or synthesizes demo audio, extracts features, trains a calibrated baseline, evaluates (ROC-AUC, PPV@0.80 sensitivity, ECE), and runs a demo inference. Not a diagnosis; demo only.


In [1]:
REPO_URL = "https://github.com/Philshirt18/cognisign2.git"
PROJECT_DIR = "/content/cognisight"
import os, shutil
shutil.rmtree(PROJECT_DIR, ignore_errors=True)
!git clone $REPO_URL $PROJECT_DIR
!ls -la $PROJECT_DIR


Cloning into '/content/cognisight'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 77 (delta 20), reused 74 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (77/77), 4.19 MiB | 9.14 MiB/s, done.
Resolving deltas: 100% (20/20), done.
total 232
drwxr-xr-x 5 root root   4096 Nov  4 11:31 .
drwxr-xr-x 1 root root   4096 Nov  4 11:31 ..
-rw-r--r-- 1 root root     99 Nov  4 11:31 .eslintrc.json
drwxr-xr-x 8 root root   4096 Nov  4 11:31 .git
-rw-r--r-- 1 root root     90 Nov  4 11:31 .gitignore
-rw-r--r-- 1 root root    164 Nov  4 11:31 next.config.js
-rw-r--r-- 1 root root    201 Nov  4 11:31 next-env.d.ts
-rw-r--r-- 1 root root    525 Nov  4 11:31 package.json
-rw-r--r-- 1 root root 186106 Nov  4 11:31 package-lock.json
drwxr-xr-x 3 root root   4096 Nov  4 11:31 public
-rw-r--r-- 1 root root   1967 Nov  4 11:31 README.md
drwxr-xr-x 5 root root   4096 Nov  4 11:31 src

In [2]:
%cd $PROJECT_DIR
!pip -q install -r requirements.txt || print("No requirements.txt or some deps failed; proceeding with core libs.")
!pip -q install librosa scikit-learn soundfile numpy pandas matplotlib

/content/cognisight
/bin/bash: -c: line 1: syntax error near unexpected token `"No requirements.txt or some deps failed; proceeding with core libs."'
/bin/bash: -c: line 1: `pip -q install -r requirements.txt || print("No requirements.txt or some deps failed; proceeding with core libs.")'


In [3]:
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd()))
!ls -R

.:
next.config.js	package.json	   public     src
next-env.d.ts	package-lock.json  README.md  tsconfig.json

./public:
demo

./public/demo:
healthy.wav  higher-risk.wav  README.txt

./src:
app  components  lib  types.ts

./src/app:
api  disclaimer  globals.css  layout.tsx  page.tsx

./src/app/api:
process-audio

./src/app/api/process-audio:
route.ts

./src/app/disclaimer:
page.tsx

./src/components:
AnalysisSummary.tsx  FileUploader.tsx  OnboardingSteps.tsx
AudioPreview.tsx     Hero.tsx	       Recorder.tsx

./src/lib:
api.ts


In [4]:
import glob, os
AUDIO_DIR = os.path.join(PROJECT_DIR, "sample_audio")
files = glob.glob(os.path.join(AUDIO_DIR, "*.wav"))
print("Found audio files:", files[:5])

Found audio files: []


In [5]:
if len(files) == 0:
    import numpy as np, soundfile as sf, os
    AUDIO_DIR = "/content/demo_audio"
    os.makedirs(AUDIO_DIR, exist_ok=True)
    sr = 16000
    def tone(freq, secs, noise=0.01, amp=0.2):
        t = np.linspace(0, secs, int(sr * secs), endpoint=False)
        x = (np.sin(2 * np.pi * freq * t) * amp).astype("float32")
        x += np.random.normal(0, noise, x.shape).astype("float32")
        return x
    for i in range(6):
        x = np.concatenate([tone(220, 0.6), tone(220, 0.6), tone(220, 0.6)], axis=0)
        sf.write(f"{AUDIO_DIR}/ctrl_{i}.wav", x, sr)
    for i in range(6):
        x = np.concatenate([tone(220, 0.3, amp=0.08), tone(320, 0.3), tone(220, 0.3, amp=0.08), tone(320, 0.3)], axis=0)
        sf.write(f"{AUDIO_DIR}/imp_{i}.wav", x, sr)
    files = glob.glob(os.path.join(AUDIO_DIR, "*.wav"))
print("Using audio files:", files[:5])

Using audio files: ['/content/demo_audio/imp_0.wav', '/content/demo_audio/ctrl_3.wav', '/content/demo_audio/imp_5.wav', '/content/demo_audio/ctrl_0.wav', '/content/demo_audio/imp_2.wav']


In [6]:
try:
    from app.audio.features import extract_features  # adjust if your repo path differs
except Exception as e:
    print("Fallback to minimal MFCC extractor:", e)
    import librosa, numpy as np
    def extract_features(path):
        y, sr = librosa.load(path, sr=16000)
        if len(y) < sr:
            y = librosa.util.fix_length(y, size=sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return np.concatenate([mfcc.mean(axis=1), mfcc.std(axis=1)], axis=0)


Fallback to minimal MFCC extractor: No module named 'app'


In [7]:
import numpy as np, os
X, y = [], []
for f in files:
    X.append(extract_features(f))
    y.append(1 if os.path.basename(f).startswith("imp_") else 0)
X = np.vstack(X)
y = np.array(y)
X.shape, y.shape, y.mean()


((12, 26), (12,), np.float64(0.5))

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score, recall_score, precision_score
import numpy as np

Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

base = make_pipeline(StandardScaler(), LogisticRegression(max_iter=200, class_weight="balanced"))
cal = CalibratedClassifierCV(base, method="isotonic", cv=3)
cal.fit(Xtr, ytr)

probs = cal.predict_proba(Xte)[:, 1]
auc = roc_auc_score(yte, probs)

# PPV at 0.80 sensitivity
thresholds = np.linspace(0, 1, 1001)
best_ppv = 0.0
chosen = 0.5
for t in thresholds:
    preds = (probs >= t).astype(int)
    sens = recall_score(yte, preds)
    if sens >= 0.80:
        ppv = precision_score(yte, preds, zero_division=0)
        if ppv > best_ppv:
            best_ppv, chosen = ppv, t

# Simple ECE (10 bins)
bins = np.linspace(0, 1, 11)
ece = 0.0
for i in range(len(bins) - 1):
    lo, hi = bins[i], bins[i + 1]
    idx = (probs >= lo) & (probs < hi)
    if idx.sum() == 0:
        continue
    conf = probs[idx].mean()
    acc = ((probs[idx] >= chosen).astype(int) == yte[idx]).mean()
    ece += idx.mean() * abs(acc - conf)

print(f"ROC-AUC: {auc:.2f}")
print(f"PPV @ 0.80 sensitivity: {best_ppv:.2f} (threshold ~ {chosen:.2f})")
print(f"ECE: {ece:.2f}")


ROC-AUC: 1.00
PPV @ 0.80 sensitivity: 1.00 (threshold ~ 0.07)
ECE: 0.48


In [9]:
test_path = files[0]
feat = extract_features(test_path).reshape(1, -1)
p = cal.predict_proba(feat)[0, 1]
print("Test file:", os.path.basename(test_path))
print(f"Estimated risk score: {p:.2f}")
print("Not a diagnosis — demo only.")


Test file: imp_0.wav
Estimated risk score: 0.98
Not a diagnosis — demo only.


In [10]:
import sys, sklearn, librosa, numpy
print("Python:", sys.version)
print("sklearn:", sklearn.__version__)
print("librosa:", librosa.__version__)
print("numpy:", numpy.__version__)


Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
sklearn: 1.6.1
librosa: 0.11.0
numpy: 2.0.2


File → Save a copy to GitHub  
Repo: select my repo  
Path: /notebooks/Cognisight_AI_Speech_Analysis.ipynb  
Commit: “Add reproducible hackathon notebook”


![Colab](https://colab.research.google.com/assets/colab-badge.svg)
