<a href="https://colab.research.google.com/github/Philshirt18/cognisign2/blob/main/notebooks/Cognisight_AI_Speech_Analysis.ipynb%20" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Cognisight — Reproducible Notebook

This notebook clones the repo, installs dependencies, loads or synthesizes demo audio, extracts features, trains a calibrated baseline, evaluates (ROC-AUC, PPV@0.80 sensitivity, ECE), and runs a demo inference. Not a diagnosis; demo only.


In [21]:
# RESET WORKING DIR
%cd /content

# CLONE YOUR REPO FRESH
REPO_URL = "https://github.com/Philshirt18/cognisign2.git"
PROJECT_DIR = "/content/cognisight"

!rm -rf "$PROJECT_DIR"
!git clone "$REPO_URL" "$PROJECT_DIR"
!ls -la "$PROJECT_DIR" | head -n 50


/content
Cloning into '/content/cognisight'...
remote: Enumerating objects: 92, done.[K
remote: Counting objects: 100% (92/92), done.[K
remote: Compressing objects: 100% (63/63), done.[K
remote: Total 92 (delta 27), reused 73 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (92/92), 4.19 MiB | 7.95 MiB/s, done.
Resolving deltas: 100% (27/27), done.
total 252
drwxr-xr-x 6 root root   4096 Nov  4 14:09 .
drwxr-xr-x 1 root root   4096 Nov  4 14:09 ..
-rw-r--r-- 1 root root  15328 Nov  4 14:09 Cognisight.ipynb
-rw-r--r-- 1 root root     99 Nov  4 14:09 .eslintrc.json
drwxr-xr-x 8 root root   4096 Nov  4 14:09 .git
-rw-r--r-- 1 root root     90 Nov  4 14:09 .gitignore
-rw-r--r-- 1 root root    164 Nov  4 14:09 next.config.js
-rw-r--r-- 1 root root    201 Nov  4 14:09 next-env.d.ts
drwxr-xr-x 2 root root   4096 Nov  4 14:09 notebooks
-rw-r--r-- 1 root root    525 Nov  4 14:09 package.json
-rw-r--r-- 1 root root 186106 Nov  4 14:09 package-lock.json
drwxr-xr-x 3 root root   40

In [22]:
%cd $PROJECT_DIR

# If requirements.txt exists, install; otherwise continue
!if [ -f requirements.txt ]; then pip -q install -r requirements.txt; else echo "No requirements.txt found — continuing."; fi

# Core libs you need for the rest of the notebook
!pip -q install librosa scikit-learn soundfile numpy pandas matplotlib

/content/cognisight
No requirements.txt found — continuing.


In [23]:
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd()))
!ls -R | head -n 200

.:
Cognisight.ipynb
next.config.js
next-env.d.ts
notebooks
package.json
package-lock.json
public
README.md
src
tsconfig.json

./notebooks:
Cognisight_AI_Speech_Analysis.ipynb

./public:
demo

./public/demo:
healthy.wav
higher-risk.wav
README.txt

./src:
app
components
lib
types.ts

./src/app:
api
disclaimer
globals.css
layout.tsx
page.tsx

./src/app/api:
process-audio

./src/app/api/process-audio:
route.ts

./src/app/disclaimer:
page.tsx

./src/components:
AnalysisSummary.tsx
AudioPreview.tsx
FileUploader.tsx
Hero.tsx
OnboardingSteps.tsx
Recorder.tsx

./src/lib:
api.ts


In [24]:
import glob, os
AUDIO_DIR = os.path.join(PROJECT_DIR, "sample_audio")
files = glob.glob(os.path.join(AUDIO_DIR, "*.wav"))
print("Found audio files:", files[:5])

Found audio files: []


In [25]:
if len(files) == 0:
    import numpy as np, soundfile as sf
    AUDIO_DIR = "/content/demo_audio"; os.makedirs(AUDIO_DIR, exist_ok=True)
    sr = 16000
    def tone(freq, secs, noise=0.01, amp=0.2):
        t = np.linspace(0, secs, int(sr*secs), endpoint=False)
        x = (np.sin(2*np.pi*freq*t)*amp).astype("float32")
        x += np.random.normal(0, noise, x.shape).astype("float32")
        return x
    for i in range(6):
        x = np.concatenate([tone(220,0.6), tone(220,0.6), tone(220,0.6)], 0)
        sf.write(f"{AUDIO_DIR}/ctrl_{i}.wav", x, sr)
    for i in range(6):
        x = np.concatenate([tone(220,0.3,amp=0.08), tone(320,0.3), tone(220,0.3,amp=0.08), tone(320,0.3)], 0)
        sf.write(f"{AUDIO_DIR}/imp_{i}.wav", x, sr)
    files = glob.glob(os.path.join(AUDIO_DIR, "*.wav"))
print("Using audio files:", files[:5])

Using audio files: ['/content/demo_audio/imp_0.wav', '/content/demo_audio/ctrl_3.wav', '/content/demo_audio/imp_5.wav', '/content/demo_audio/ctrl_0.wav', '/content/demo_audio/imp_2.wav']


In [26]:
try:
    from app.audio.features import extract_features  # adjust if your repo path differs
except Exception as e:
    print("Fallback to minimal MFCC extractor:", e)
    import librosa, numpy as np
    def extract_features(path):
        y, sr = librosa.load(path, sr=16000)
        if len(y) < sr:
            y = librosa.util.fix_length(y, size=sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return np.concatenate([mfcc.mean(axis=1), mfcc.std(axis=1)], axis=0)


Fallback to minimal MFCC extractor: No module named 'app'


In [27]:
import numpy as np, os
X, y = [], []
for f in files:
    X.append(extract_features(f))
    y.append(1 if os.path.basename(f).startswith("imp_") else 0)
X = np.vstack(X)
y = np.array(y)
X.shape, y.shape, y.mean()


((12, 26), (12,), np.float64(0.5))

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score, recall_score, precision_score
import numpy as np

Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

base = make_pipeline(StandardScaler(), LogisticRegression(max_iter=200, class_weight="balanced"))
cal = CalibratedClassifierCV(base, method="isotonic", cv=3)
cal.fit(Xtr, ytr)

probs = cal.predict_proba(Xte)[:, 1]
auc = roc_auc_score(yte, probs)

# PPV at 0.80 sensitivity
thresholds = np.linspace(0, 1, 1001)
best_ppv = 0.0
chosen = 0.5
for t in thresholds:
    preds = (probs >= t).astype(int)
    sens = recall_score(yte, preds)
    if sens >= 0.80:
        ppv = precision_score(yte, preds, zero_division=0)
        if ppv > best_ppv:
            best_ppv, chosen = ppv, t

# Simple ECE (10 bins)
bins = np.linspace(0, 1, 11)
ece = 0.0
for i in range(len(bins) - 1):
    lo, hi = bins[i], bins[i + 1]
    idx = (probs >= lo) & (probs < hi)
    if idx.sum() == 0:
        continue
    conf = probs[idx].mean()
    acc = ((probs[idx] >= chosen).astype(int) == yte[idx]).mean()
    ece += idx.mean() * abs(acc - conf)

print(f"ROC-AUC: {auc:.2f}")
print(f"PPV @ 0.80 sensitivity: {best_ppv:.2f} (threshold ~ {chosen:.2f})")
print(f"ECE: {ece:.2f}")


ROC-AUC: 1.00
PPV @ 0.80 sensitivity: 1.00 (threshold ~ 0.00)
ECE: 0.51


In [29]:
test_path = files[0]
feat = extract_features(test_path).reshape(1, -1)
p = cal.predict_proba(feat)[0, 1]
print("Test file:", os.path.basename(test_path))
print(f"Estimated risk score: {p:.2f}")
print("Not a diagnosis — demo only.")


Test file: imp_0.wav
Estimated risk score: 0.98
Not a diagnosis — demo only.


In [30]:
import sys, sklearn, librosa, numpy
print("Python:", sys.version)
print("sklearn:", sklearn.__version__)
print("librosa:", librosa.__version__)
print("numpy:", numpy.__version__)


Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
sklearn: 1.6.1
librosa: 0.11.0
numpy: 2.0.2


File → Save a copy to GitHub  
Repo: select my repo  
Path: /notebooks/Cognisight_AI_Speech_Analysis.ipynb  
Commit: “Add reproducible hackathon notebook”


![Colab](https://colab.research.google.com/assets/colab-badge.svg)
