In [1]:
# Install required libraries
!pip install librosa transformers



In [2]:
# Define paths to data
cha_path = '/kaggle/input/pittcombined/PittCombined/cha'
mp3_path = '/kaggle/input/pittcombined/PittCombined/mp3'

In [3]:
# Import necessary libraries
import os
import librosa
import pandas as pd
from transformers import BertTokenizer, BertModel


In [4]:
# List files in directories
cha_files = [os.path.join(cha_path, file) for file in os.listdir(cha_path) if file.endswith('.cha')]
mp3_files = [os.path.join(mp3_path, file) for file in os.listdir(mp3_path) if file.endswith('.mp3')]

print(f"Found {len(cha_files)} CHA files and {len(mp3_files)} MP3 files.")

Found 1255 CHA files and 1253 MP3 files.


In [5]:
import os
import re
import random
from collections import defaultdict, Counter

def extract_diagnosis(cha_file):
    """Extracts the diagnosis from a CHAT transcript file."""
    with open(cha_file, 'r') as file:
        content = file.read()
    match = re.search(r'@ID:\s*[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|([^|]*)\|', content)
    return match.group(1) if match else None

# Paths to your CHA and MP3 files
cha_path = '/kaggle/input/pittcombined/PittCombined/cha'
mp3_path = '/kaggle/input/pittcombined/PittCombined/mp3'

# Load all CHA files and their diagnoses
cha_files = [f for f in os.listdir(cha_path) if f.endswith('.cha')]
diagnoses = [extract_diagnosis(os.path.join(cha_path, f)) for f in cha_files]

# Group files by diagnosis, ensuring corresponding MP3 exists
files_by_diagnosis = defaultdict(list)
for cha_file, diag in zip(cha_files, diagnoses):
    mp3_file = cha_file.replace('.cha', '.mp3')
    if os.path.exists(os.path.join(mp3_path, mp3_file)):
        files_by_diagnosis[diag].append(cha_file)

# Select up to 500 files, trying to balance across diagnoses
selected_files = []
for diag, files in files_by_diagnosis.items():
    select_count = min(len(files), max(500 // len(files_by_diagnosis), 1))
    selected_files.extend(random.sample(files, select_count))

# Ensure the selection does not exceed 10 if categories were unbalanced
selected_files = random.sample(selected_files, min(500, len(selected_files)))

# Count of selected diagnoses
selected_diagnoses = [extract_diagnosis(os.path.join(cha_path, f)) for f in selected_files]
diagnosis_count = Counter(selected_diagnoses)

# Find corresponding MP3 files
selected_mp3_files = [f.replace('.cha', '.mp3') for f in selected_files]

# Collect full paths for the selected files
cha_files = [os.path.join(cha_path, f) for f in selected_files]
mp3_files = [os.path.join(mp3_path, f) for f in selected_mp3_files]

# Output results
print("Selected CHA files:", cha_files)
print("Selected MP3 files:", mp3_files)
print("Diagnosis counts:", dict(diagnosis_count))


Selected CHA files: ['/kaggle/input/pittcombined/PittCombined/cha/711-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/709-2.cha', '/kaggle/input/pittcombined/PittCombined/cha/141-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/016-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/704-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/358-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/065-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/260-1s.cha', '/kaggle/input/pittcombined/PittCombined/cha/235-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/581-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/302-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/450-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/295-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/705-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/714-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/339-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/209-1.cha', '/ka

In [6]:
cha_base_names = set([os.path.splitext(os.path.basename(f))[0] for f in cha_files])
mp3_base_names = set([os.path.splitext(os.path.basename(f))[0] for f in mp3_files])

unmatched_cha = cha_base_names - mp3_base_names
unmatched_mp3 = mp3_base_names - cha_base_names

print("Unmatched CHA files:", unmatched_cha)
print("Unmatched MP3 files:", unmatched_mp3)

Unmatched CHA files: set()
Unmatched MP3 files: set()


In [7]:
import librosa
from transformers import BertTokenizer, BertModel
import soundfile as sf

# Initialize Clinical BERT
tokenizer = BertTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = BertModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

  return self.fget.__get__(instance, owner)()


In [8]:
import os
import librosa
import soundfile as sf

def preprocess_audio(mp3_file_path, output_wav_path):
    # Define output path within the writable directory
    output_wav_path = os.path.join('/kaggle/working', os.path.basename(output_wav_path))
    
    try:
        # Check if the WAV file already exists
        if not os.path.exists(output_wav_path):
            # Convert MP3 to WAV
            y, sr = librosa.load(mp3_file_path, sr=None)
            sf.write(output_wav_path, y, sr)
            print(f"Converted {mp3_file_path} to WAV.")
        else:
            print(f"WAV file already exists: {output_wav_path}")

        # Extract MFCC features
        y, sr = librosa.load(output_wav_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return mfccs.mean(axis=1)
    except Exception as e:
        print(f"An error occurred while processing {mp3_file_path}: {str(e)}")
        return None

# Replace 'mp3_files' with the actual list of mp3 file paths
audio_features = [preprocess_audio(f, f.replace('.mp3', '.wav')) for f in mp3_files if f.endswith('.mp3')]


Converted /kaggle/input/pittcombined/PittCombined/mp3/711-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/709-2.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/141-1.mp3 to WAV.
WAV file already exists: /kaggle/working/016-0.wav
Converted /kaggle/input/pittcombined/PittCombined/mp3/704-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/358-0s.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/065-0s.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/260-1s.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/235-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/581-0r.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/302-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/450-1.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/295-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/705-0f.mp3 to WAV.
Converte

In [9]:
# Define a function to preprocess text data
def preprocess_text(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    # Insert text cleaning code here as necessary
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
    outputs = model(**inputs)
    return outputs.last_hidden_state.squeeze().detach().numpy()

In [10]:
print(cha_files)

['/kaggle/input/pittcombined/PittCombined/cha/711-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/709-2.cha', '/kaggle/input/pittcombined/PittCombined/cha/141-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/016-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/704-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/358-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/065-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/260-1s.cha', '/kaggle/input/pittcombined/PittCombined/cha/235-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/581-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/302-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/450-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/295-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/705-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/714-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/339-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/209-1.cha', '/kaggle/input/pittcombi

In [11]:
# Process each file
text_embeddings = [preprocess_text(f) for f in cha_files]

In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assuming text_embeddings and audio_features are lists of numpy arrays
# Convert lists to numpy arrays
text_features = np.array(text_embeddings)
audio_features = np.stack(audio_features)

In [13]:
print(text_features)

[[[-1.04000159e-01  1.51692152e-01 -6.34933054e-01 ...  3.60866070e-01
    2.24927366e-01 -8.98698717e-02]
  [-3.33925158e-01 -1.37382727e-02  5.48878275e-02 ...  2.09870767e-02
    3.99613887e-01 -5.42843163e-01]
  [ 9.68751386e-02  4.68988009e-02  3.22347313e-01 ...  8.23238254e-01
   -7.67341971e-01  5.03083706e-01]
  ...
  [-4.71645743e-02 -4.10266668e-01 -4.30504888e-01 ...  7.07796663e-02
   -4.22425985e-01 -5.22299886e-01]
  [-5.63699663e-01  3.73091310e-01 -5.75687811e-02 ... -2.36332640e-02
   -2.84061395e-02 -8.54125857e-01]
  [ 6.57536566e-01 -7.74822414e-01 -6.47103310e-01 ...  6.53192580e-01
    1.15623116e+00  4.00950044e-01]]

 [[-1.27749950e-01  1.66656330e-01 -7.25917339e-01 ...  2.28262991e-01
    3.38096946e-01  4.17821556e-02]
  [-3.74157786e-01  3.43713933e-03 -3.38526182e-02 ... -5.54507785e-02
    7.26982474e-01 -4.87023264e-01]
  [-1.59587428e-01 -1.37978951e-02  3.33154440e-01 ...  5.00441611e-01
   -6.11559987e-01  4.15665537e-01]
  ...
  [ 3.15257668e-01  5.0

In [14]:
print(audio_features)

[[-3.3774756e+02  1.4947374e+02  1.8454836e+01 ... -1.9050226e+00
  -1.7488632e+00 -1.7691449e+00]
 [-5.4988507e+02  1.0918132e+02  2.4517257e-02 ... -9.2503281e+00
   2.7900538e+00 -5.0097218e+00]
 [-5.0623428e+02  9.7629036e+01 -2.9152410e+00 ... -6.4949141e+00
   9.3574753e+00 -2.9741499e-01]
 ...
 [-4.9802316e+02  1.3753177e+02  2.5676470e+01 ...  1.2476596e+00
   4.3257384e+00  2.7246422e-01]
 [-4.7319528e+02  1.3473828e+02 -7.5560422e+00 ... -3.5670638e-01
   1.7183315e+00 -6.7503567e+00]
 [-4.1611035e+02  1.3263930e+02  3.3089481e+01 ...  4.6662607e+00
   4.5605416e+00  2.5933096e+00]]


In [15]:
text_features = np.mean(text_features, axis=1)


In [16]:
print(text_features)

[[-0.1627775  -0.35868293 -0.19880217 ...  0.14659801  0.10734987
  -0.1184838 ]
 [-0.10298862 -0.29745576 -0.23006223 ... -0.03559928  0.09332593
  -0.00941374]
 [-0.16487996 -0.17608765 -0.23189425 ...  0.01905615  0.11746965
  -0.1455462 ]
 ...
 [-0.11147115 -0.2213241  -0.24779959 ...  0.11804107 -0.07674947
  -0.09184466]
 [-0.13923773 -0.25073418 -0.15422913 ...  0.00597174  0.05294795
  -0.11269989]
 [-0.11139227 -0.3305669  -0.15961027 ... -0.00471439  0.0919053
  -0.03640968]]


In [17]:
# Ensure audio_features is 2D (it should already be if you've extracted features correctly)
if audio_features.ndim > 2:
    audio_features = np.mean(audio_features, axis=1)

In [18]:
# Combine text and audio features
combined_features = np.concatenate([text_features, audio_features], axis=1)

In [19]:
print(combined_features)

[[-0.1627775  -0.35868293 -0.19880217 ... -1.9050226  -1.7488632
  -1.7691449 ]
 [-0.10298862 -0.29745576 -0.23006223 ... -9.250328    2.7900538
  -5.0097218 ]
 [-0.16487996 -0.17608765 -0.23189425 ... -6.494914    9.357475
  -0.297415  ]
 ...
 [-0.11147115 -0.2213241  -0.24779959 ...  1.2476596   4.3257384
   0.27246422]
 [-0.13923773 -0.25073418 -0.15422913 ... -0.35670638  1.7183315
  -6.7503567 ]
 [-0.11139227 -0.3305669  -0.15961027 ...  4.6662607   4.5605416
   2.5933096 ]]


In [20]:
# Normalize the combined features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(combined_features)

In [21]:
import re

def extract_diagnostic_code(cha_file):
    # Read the content of the .cha file
    with open(cha_file, 'r') as file:
        content = file.read()

    # Regex to find the diagnosis in the file content based on the updated structure
    match = re.search(r'@ID:\s*[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|([^|]*)\|\|', content)
    if match:
        return match.group(1).strip()
    return None

# Assuming cha_files is a list of paths to your .cha files
labels = [extract_diagnostic_code(f) for f in cha_files]

# Example of how you might use this
for label in labels[:10]:  # Print first 10 labels to verify
    print(label)


PossibleAD
Control
Control
MCI
Other
Probable
MCI
MCI
ProbableAD
ProbableAD


In [22]:
from sklearn.preprocessing import LabelEncoder

# Encode labels as integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


In [23]:
print(encoded_labels)

[5 0 0 2 4 6 2 2 7 7 0 7 0 7 5 7 0 3 2 7 2 8 8 7 5 0 0 2 7 0 3 7 2 3 5 3 5
 3 2 3 0 2 2 2 7 2 2 0 0 2 2 5 7 2 2 2 7 0 2 5 5 2 7 5 7 5 5 7 0 5 7 0 7 0
 0 5 5 5 2 2 0 0 8 8 8 0 8 2 7 5 2 8 0 7 5 0 0 7 5 5 5 7 0 5 7 2 0 3 0 5 0
 8 5 0 0 5 2 7 6 8 6 7 7 2 5 0 7 7 4 2 2 7 2 0 2 0 8 0 0 7 0 2 7 7 2 2 4 7
 7 5 5 7 7 2 7 5 3 7 5 7 7 2 8 8 6 2 5 7 0 7 5 5 0 7 0 0 2 7 0 0 5 0 5 5 5
 8 0 2 0 2 5 8 2 5 2 7 5 7 3 2 2 2 3 8 5 5 2 7 2 5 0 8 0 0 0 2 8 5 7 7 0 2
 7 2 5 3 5 5 5 2 4 2 2 5 0 0 0 3 5 0 2 2 5 8 0 7 7 5 5 5 1 5 8 7 7 0 0 7 7
 5]


In [24]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, Conv1D, GlobalAveragePooling1D, BatchNormalization
from tensorflow.keras.optimizers import Adam

2024-08-13 23:28:58.757061: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-13 23:28:58.757385: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-13 23:28:58.939337: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [25]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

In [26]:
# Split data into train, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, encoded_labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Finding the maximum label value across all datasets to ensure consistency
max_label = max(np.max(y_train), np.max(y_val), np.max(y_test)) + 1  # Plus one because classes are zero-indexed

# One-hot encode the labels with a consistent number of classes across datasets
y_train = to_categorical(y_train, num_classes=max_label)
y_val = to_categorical(y_val, num_classes=max_label)
y_test = to_categorical(y_test, num_classes=max_label)




In [27]:
print("y_test",y_test)

y_test [[0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0

In [28]:
from tensorflow.keras.regularizers import l2

# Model architecture
input_text = Input(shape=(768,))  # Text features from Clinical BERT
input_audio = Input(shape=(13,))  # Audio features, assuming MFCCs with 13 coefficients

# Text pathway
text_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(input_text)
text_out = Dropout(0.2)(text_dense)


In [29]:
from tensorflow.keras.layers import Reshape

# Audio pathway
audio_reshape = Reshape((13, 1))(input_audio)
conv1 = Conv1D(64, kernel_size=3, activation='relu')(audio_reshape)
conv1_bn = BatchNormalization()(conv1)
conv1_pool = GlobalAveragePooling1D()(conv1_bn)
audio_out = Dropout(0.2)(conv1_pool)

In [30]:
# Fusion and output
concatenated = Concatenate()([text_out, audio_out])
dense_layer = Dense(64, activation='relu')(concatenated)
# Assuming y_train has already been one-hot encoded correctly
num_classes = y_train.shape[1]

# Adjust your output layer
output_layer = Dense(num_classes, activation='softmax')(dense_layer)

In [31]:
print(concatenated)

<KerasTensor shape=(None, 192), dtype=float32, sparse=False, name=keras_tensor_9>


In [32]:
from tensorflow.keras.metrics import Precision, Recall

# Compile model
model = Model(inputs=[input_text, input_audio], outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', 
              metrics=['accuracy', Precision(), Recall()])

In [33]:
# Fit model on training data
model.fit([X_train[:, :768], X_train[:, 768:]], y_train, validation_data=([X_val[:, :768], X_val[:, 768:]], y_val), epochs=100, batch_size=32)

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 141ms/step - accuracy: 0.1079 - loss: 4.6141 - precision: 0.3803 - recall: 0.0168 - val_accuracy: 0.3462 - val_loss: 4.1667 - val_precision: 0.3333 - val_recall: 0.0577
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4067 - loss: 3.7311 - precision: 0.6511 - recall: 0.1432 - val_accuracy: 0.4423 - val_loss: 3.7580 - val_precision: 0.5556 - val_recall: 0.1923
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4546 - loss: 3.5405 - precision: 0.6041 - recall: 0.2347 - val_accuracy: 0.5000 - val_loss: 3.5935 - val_precision: 0.6000 - val_recall: 0.2308
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6015 - loss: 3.1964 - precision: 0.7532 - recall: 0.3044 - val_accuracy: 0.4423 - val_loss: 3.8190 - val_precision: 0.5000 - val_recall: 0.2500
Epoch 5/100
[1m5/5[0m [32m━━

<keras.src.callbacks.history.History at 0x7b4f651bbd30>

In [34]:
# Evaluate the model on the test set
evaluation = model.evaluate([X_test[:, :768], X_test[:, 768:]], y_test)
print(f'Accuracy: {evaluation[1]*100:.2f}%, Precision: {evaluation[2]*100:.2f}%, Recall: {evaluation[3]*100:.2f}%')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2989 - loss: 3.5742 - precision: 0.3257 - recall: 0.2989 
Accuracy: 30.77%, Precision: 33.33%, Recall: 30.77%
