In [None]:
#install libraries
!pip install praat-parselmouth librosa scikit-learn imbalanced-learn shap tqdm tensorflow==2.20.0 pillow
!pip install mlxtend
!pip install onnx onnx-tf
!pip install tensorflow




In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib
matplotlib.use("Agg")  
import matplotlib.pyplot as plt
from tqdm import tqdm

#start config
BASE_DIR = r"C:\Users\vidha\Documents\congressionalapp\model"
DATASET_DIR = os.path.join(BASE_DIR, "voice_dataset")
OUTPUT_DIR = os.path.join(BASE_DIR, "spectrograms_melfixed") 
SAMPLE_RATE = 22050
N_MELS = 128
os.makedirs(OUTPUT_DIR, exist_ok=True)

#helpers
def save_spectrogram(file_path, label_prefix, index):
    try:
        print(f"Loading {file_path}")
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        if len(y) == 0:
            print(f"Empty audio: {file_path}")
            return

        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
        mel_db = librosa.power_to_db(mel_spec, ref=np.max)

        plt.figure(figsize=(5, 5))
        librosa.display.specshow(mel_db, sr=sr, cmap='magma')
        plt.axis('off')
        plt.tight_layout(pad=0)

        output_name = f"{label_prefix}_{index:04d}.png"
        output_path = os.path.join(OUTPUT_DIR, output_name)
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        plt.close()
        print(f"Saved: {output_path}")
    except Exception as e:
        print(f"Error processing {file_path}: {e}")

#,aom
labels = {"PD_AH": "PD", "HC_AH": "HC"}
counter = {"PD": 0, "HC": 0}

for subfolder, label_prefix in labels.items():
    folder_path = os.path.join(DATASET_DIR, subfolder)
    if not os.path.isdir(folder_path):
        print(f"Skipping missing folder: {folder_path}")
        continue

    print(f"\nGenerating spectrograms for {label_prefix} from {folder_path}")
    for fname in tqdm(os.listdir(folder_path), desc=f"Processing {label_prefix}"):
        if not fname.lower().endswith((".wav", ".mp3", ".m4a", ".flac")):
            continue

        file_path = os.path.join(folder_path, fname)
        counter[label_prefix] += 1
        save_spectrogram(file_path, label_prefix, counter[label_prefix])

print(f"\nall spectrograms saved in '{OUTPUT_DIR}/'")
print(f"   PD: {counter['PD']} files")
print(f"   HC: {counter['HC']} files")


In [None]:
#augmentation
import os, random
import numpy as np
from PIL import Image, ImageEnhance, ImageOps
from tqdm import tqdm

# directories
SPEC_DIR = "spectrograms_from_kotlin"
AUG_DIR = "spectrograms_augmented"
os.makedirs(AUG_DIR, exist_ok=True)

def specaugment(img, freq_mask_param=30, time_mask_param=40, num_masks=2):
    """Apply frequency and time masking like SpecAugment."""
    img_np = np.array(img)
    h, w = img_np.shape[:2]
    for _ in range(num_masks):
        f = random.randint(0, freq_mask_param)
        f0 = random.randint(0, max(1, h - f))
        img_np[f0:f0+f, :] = img_np.mean()

        t = random.randint(0, time_mask_param)
        t0 = random.randint(0, max(1, w - t))
        img_np[:, t0:t0+t] = img_np.mean()

    return Image.fromarray(img_np.astype(np.uint8))

def random_augment(img):
    """Apply random combination of SpecAugment + color/noise augmentations."""
    # SpecAugment
    # if random.random() < 0.5:
    #     img = specaugment(img)

    #Color jitter (contrast + brightness)
    if random.random() < 0.9:
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(random.uniform(0.8,1.2))
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(random.uniform(0.8,1.2))

    # # Horizontal flip
    # if random.random() < 0.5:
    #     img = ImageOps.mirror(img)

    #Gaussian noise
    if random.random() < 0.9:
        arr = np.array(img).astype(np.float32)
        noise = np.random.normal(0, 5, arr.shape)
        arr = np.clip(arr + noise, 0, 255)
        img = Image.fromarray(arr.astype(np.uint8))

    return img

N_AUG = 2
all_specs = [f for f in os.listdir(SPEC_DIR) if f.endswith(".png")]
print(f" Generating {N_AUG} augmented copies for {len(all_specs)} spectrograms...")
print(all_specs)

for fname in tqdm(all_specs):
    img_path = os.path.join(SPEC_DIR, fname)
    img = Image.open(img_path).convert("RGB")

    if "PD" in fname.upper():
        label_prefix = "PD"
    elif "HC" in fname.upper():
        label_prefix = "HC"
    else:
        print("Skipping file:", fname)
        continue  


    base_name = os.path.splitext(fname)[0]

    for i in range(N_AUG):
        aug_img = random_augment(img)
        new_name = f"{label_prefix}_{base_name}_aug{i+1}.png"
        aug_img.save(os.path.join(AUG_DIR, new_name))

print(f"augmented data saved in '{AUG_DIR}/'")


In [None]:
#ai model

import os, numpy as np, warnings, time, random
import librosa, librosa.display
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
warnings.filterwarnings("ignore")
np.random.seed(42)
torch.manual_seed(42)

print("GPU Configuration:")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    print(f"GPU Found: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"Using device: {device}")
else:
    print("No GPU found. Running on CPU")

def extract_middle_segment(audio_path, duration=1.5, sr=8000):
    """Extract 1.5s middle voiced segment"""
    try:
        y, _ = librosa.load(audio_path, sr=sr, duration=10)
        intervals = librosa.effects.split(y, top_db=30)
        if len(intervals) == 0:
            return None
        voiced_parts = [y[start:end] for start, end in intervals]
        longest = max(voiced_parts, key=len)
        total = int(duration * sr)
        if len(longest) < total:
            return None
        start = (len(longest) - total) // 2
        return longest[start:start + total]
    except Exception as e:
        print(f"{os.path.basename(audio_path)}: {e}")
        return None


def create_mel_spectrogram(audio, sr=8000, n_mels=256, n_fft=1024, hop_length=51, save_path=None):
    """Create mel-scale spectrogram"""
    S = librosa.feature.melspectrogram(
        y=audio, sr=sr, n_mels=n_mels, n_fft=n_fft,
        hop_length=hop_length, window='hann'
    )
    S_db = librosa.power_to_db(S, ref=np.max)
    S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min())
    plt.figure(figsize=(6, 6))
    librosa.display.specshow(S_norm, sr=sr, hop_length=hop_length, cmap='viridis')
    plt.axis('off')
    plt.tight_layout(pad=0)
    if save_path:
        plt.savefig(save_path, dpi=100, bbox_inches='tight', pad_inches=0)
    plt.close()
    return save_path


class SpectrogramDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label


class InceptionV3Classifier(nn.Module):
    def __init__(self, num_classes=2, dropout=0.2):
        super(InceptionV3Classifier, self).__init__()
        
        self.inception = models.inception_v3(weights='IMAGENET1K_V1', aux_logits=True)
        
        for param in self.inception.parameters():
            param.requires_grad = False
        

        num_features = self.inception.fc.in_features
        self.inception.fc = nn.Identity()
        self.inception.AuxLogits.fc = nn.Identity()
        
        self.classifier = nn.Sequential(
            nn.BatchNorm1d(num_features),
            nn.Linear(num_features, 1024),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        if self.training:
            self.inception.eval()
        
        x = self.inception(x)
        
        x = self.classifier(x)
        return x

def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).long()  
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)


def evaluate_model(model, test_loader, device):
    model.eval()
    y_true, y_pred, y_probs = [], [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).long()
            logits = model(inputs)
            probs = torch.softmax(logits, dim=1)
            y_probs.extend(probs[:, 1].cpu().numpy())
            y_pred.extend(torch.argmax(probs, dim=1).cpu().numpy())
            y_true.extend(labels.cpu().numpy())
    return np.array(y_true), np.array(y_pred), np.array(y_probs)



DATA_DIR = "voice_dataset"
SPEC_DIR = "spectrograms_melfixed"
AUG_DIR = "spectrograms_augmented"
os.makedirs(SPEC_DIR, exist_ok=True)
os.makedirs(AUG_DIR, exist_ok=True)

audio_paths, labels = [], []
for folder in ["PD_AH", "HC_AH"]:
    folder_path = os.path.join(DATA_DIR, folder)
    if not os.path.isdir(folder_path): continue
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".wav"):
            audio_paths.append(os.path.join(folder_path, filename))
            labels.append(1 if "PD" in folder else 0)
print(f" Found {len(audio_paths)} audio files: {sum(labels)} PD / {len(labels)-sum(labels)} HC")

print("\n Loading precomputed spectrograms...")
spectrogram_paths, valid_labels = [], []
for f in os.listdir(SPEC_DIR):
    if f.endswith(".png"):
        spectrogram_paths.append(os.path.join(SPEC_DIR, f))
        valid_labels.append(1 if "PD" in f.upper() else 0)
print(f" Loaded {len(spectrogram_paths)} spectrograms from {SPEC_DIR}")


original_paths, original_labels = [], []
for folder in ["PD_AH", "HC_AH"]:
    folder_path = os.path.join(DATA_DIR, folder)
    if not os.path.isdir(folder_path):
        continue
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".wav"):
            label = 1 if "PD" in folder else 0
            original_paths.append(os.path.join(folder_path, filename))
            original_labels.append(label)
print(f"Original audio files: {len(original_paths)} total")

combined_paths, combined_labels = [], []
for folder in [SPEC_DIR, AUG_DIR]:
    for file in os.listdir(folder):
        if file.endswith(".png"):
            combined_paths.append(os.path.join(folder, file))
            combined_labels.append(1 if "PD" in file.upper() else 0)
print(f"Combined spectrograms (orig + aug): {len(combined_paths)} total")


transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_loader = DataLoader(SpectrogramDataset(combined_paths, combined_labels, transform),
                          batch_size=8, shuffle=True)
test_loader = DataLoader(SpectrogramDataset(original_paths, original_labels, transform),
                         batch_size=8, shuffle=False)

print("\n Running 100 iterations of 70/30 train-test split...\n")
auc_scores, acc_scores, prec_scores, rec_scores, f1_scores = [], [], [], [], []
for iteration in range(100):
    X_train, X_test, y_train, y_test = train_test_split(
        combined_paths, combined_labels, test_size=0.3,
        stratify=combined_labels, random_state=iteration
    )
    train_loader = DataLoader(SpectrogramDataset(X_train, y_train, transform), batch_size=8, shuffle=True)
    test_loader = DataLoader(SpectrogramDataset(X_test, y_test, transform), batch_size=8, shuffle=False)
    model = InceptionV3Classifier().to(device)
    opt = optim.Adam(model.parameters(), lr=0.001)
    crit = nn.CrossEntropyLoss()

    best_loss, best_state, patience = float('inf'), None, 0
    for epoch in range(10):
        loss = train_model(model, train_loader, crit, opt, device)
        if loss < best_loss:
            best_loss, best_state, patience = loss, model.state_dict(), 0
        else:
            patience += 1
            if patience >= 3: break
    model.load_state_dict(best_state)

    y_true, y_pred, y_prob = evaluate_model(model, test_loader, device)
    auc = roc_auc_score(y_true, y_prob)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc_scores.append(auc); acc_scores.append(acc)
    prec_scores.append(prec); rec_scores.append(rec); f1_scores.append(f1)
    print("we are on iteration" + str(iteration))
    if iteration % 10 == 0:
        print(str(iteration) + " results")
        print(f"AUC:       {np.mean(auc_scores):.4f} ± {np.std(auc_scores):.4f}")
        print(f"Accuracy:  {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
        print(f"Precision: {np.mean(prec_scores):.4f} ± {np.std(prec_scores):.4f}")
        print(f"Recall:    {np.mean(rec_scores):.4f} ± {np.std(rec_scores):.4f}")
        print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")

print("\n📊 FINAL RESULTS (100 iterations)")
print(f"AUC:       {np.mean(auc_scores):.4f} ± {np.std(auc_scores):.4f}")
print(f"Accuracy:  {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
print(f"Precision: {np.mean(prec_scores):.4f} ± {np.std(prec_scores):.4f}")
print(f"Recall:    {np.mean(rec_scores):.4f} ± {np.std(rec_scores):.4f}")
print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")

# winning
model.load_state_dict(best_state)
torch.save(best_state, "best_model. pth")
print("Saved best model weights to best_model.pth")



In [None]:
import torch

model = InceptionV3Classifier(num_classes=2)
model.load_state_dict(torch.load("best_model.pth", map_location="cpu"))
model.inception.aux_logits = False
model.eval()

example_input = torch.randn(1, 3, 299, 299)
traced = torch.jit.trace(model, example_input)
traced.save("best_model.pt")

print("Done — TorchScript model saved!")
