In [1]:
# Install required libraries
!pip install librosa transformers   



In [2]:
# Define paths to data
cha_path = '/kaggle/input/pittcombined/PittCombined/cha'
mp3_path = '/kaggle/input/pittcombined/PittCombined/mp3'

In [3]:
# Import necessary libraries
import os
import librosa
import pandas as pd
from transformers import BertTokenizer, BertModel


In [4]:
# List files in directories
cha_files = [os.path.join(cha_path, file) for file in os.listdir(cha_path) if file.endswith('.cha')]
mp3_files = [os.path.join(mp3_path, file) for file in os.listdir(mp3_path) if file.endswith('.mp3')]

print(f"Found {len(cha_files)} CHA files and {len(mp3_files)} MP3 files.")

Found 1255 CHA files and 1253 MP3 files.


In [5]:
import os
import re
import random
from collections import defaultdict, Counter

def extract_diagnosis(cha_file):
    """Extracts the diagnosis from a CHAT transcript file."""
    with open(cha_file, 'r') as file:
        content = file.read()
    match = re.search(r'@ID:\s*[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|([^|]*)\|', content)
    return match.group(1) if match else None

# Paths to your CHA and MP3 files
cha_path = '/kaggle/input/pittcombined/PittCombined/cha'
mp3_path = '/kaggle/input/pittcombined/PittCombined/mp3'

# Load all CHA files and their diagnoses
cha_files = [f for f in os.listdir(cha_path) if f.endswith('.cha')]
diagnoses = [extract_diagnosis(os.path.join(cha_path, f)) for f in cha_files]

# Group files by diagnosis, ensuring corresponding MP3 exists
files_by_diagnosis = defaultdict(list)
for cha_file, diag in zip(cha_files, diagnoses):
    mp3_file = cha_file.replace('.cha', '.mp3')
    if os.path.exists(os.path.join(mp3_path, mp3_file)):
        files_by_diagnosis[diag].append(cha_file)

# Select up to 500 files, trying to balance across diagnoses
selected_files = []
for diag, files in files_by_diagnosis.items():
    select_count = min(len(files), max(500 // len(files_by_diagnosis), 1))
    selected_files.extend(random.sample(files, select_count))

# Ensure the selection does not exceed 10 if categories were unbalanced
selected_files = random.sample(selected_files, min(500, len(selected_files)))

# Count of selected diagnoses
selected_diagnoses = [extract_diagnosis(os.path.join(cha_path, f)) for f in selected_files]
diagnosis_count = Counter(selected_diagnoses)

# Find corresponding MP3 files
selected_mp3_files = [f.replace('.cha', '.mp3') for f in selected_files]

# Collect full paths for the selected files
cha_files = [os.path.join(cha_path, f) for f in selected_files]
mp3_files = [os.path.join(mp3_path, f) for f in selected_mp3_files]

# Output results
print("Selected CHA files:", cha_files)
print("Selected MP3 files:", mp3_files)
print("Diagnosis counts:", dict(diagnosis_count))


Selected CHA files: ['/kaggle/input/pittcombined/PittCombined/cha/672-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/361-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/237-1f.cha', '/kaggle/input/pittcombined/PittCombined/cha/045-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/212-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/114-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/280-2.cha', '/kaggle/input/pittcombined/PittCombined/cha/033-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/030-1s.cha', '/kaggle/input/pittcombined/PittCombined/cha/450-1r.cha', '/kaggle/input/pittcombined/PittCombined/cha/005-2r.cha', '/kaggle/input/pittcombined/PittCombined/cha/175-3.cha', '/kaggle/input/pittcombined/PittCombined/cha/002-3.cha', '/kaggle/input/pittcombined/PittCombined/cha/642-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/061-1f.cha', '/kaggle/input/pittcombined/PittCombined/cha/672-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/109-3.cha', '

In [6]:
cha_base_names = set([os.path.splitext(os.path.basename(f))[0] for f in cha_files])
mp3_base_names = set([os.path.splitext(os.path.basename(f))[0] for f in mp3_files])

unmatched_cha = cha_base_names - mp3_base_names
unmatched_mp3 = mp3_base_names - cha_base_names

print("Unmatched CHA files:", unmatched_cha)
print("Unmatched MP3 files:", unmatched_mp3)

Unmatched CHA files: set()
Unmatched MP3 files: set()


In [7]:
import librosa
from transformers import BertTokenizer, BertModel
import soundfile as sf

from transformers import RobertaTokenizer, RobertaModel

# Initialize RoBERTa
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
import os
import librosa
import soundfile as sf

def preprocess_audio(mp3_file_path, output_wav_path):
    # Define output path within the writable directory
    output_wav_path = os.path.join('/kaggle/working', os.path.basename(output_wav_path))
    
    try:
        # Check if the WAV file already exists
        if not os.path.exists(output_wav_path):
            # Convert MP3 to WAV
            y, sr = librosa.load(mp3_file_path, sr=None)
            sf.write(output_wav_path, y, sr)
            print(f"Converted {mp3_file_path} to WAV.")
        else:
            print(f"WAV file already exists: {output_wav_path}")

        # Extract MFCC features
        y, sr = librosa.load(output_wav_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return mfccs.mean(axis=1)
    except Exception as e:
        print(f"An error occurred while processing {mp3_file_path}: {str(e)}")
        return None

# Replace 'mp3_files' with the actual list of mp3 file paths
audio_features = [preprocess_audio(f, f.replace('.mp3', '.wav')) for f in mp3_files if f.endswith('.mp3')]


Converted /kaggle/input/pittcombined/PittCombined/mp3/672-0r.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/361-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/237-1f.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/045-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/212-0f.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/114-1.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/280-2.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/033-0.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/030-1s.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/450-1r.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/005-2r.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/175-3.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/002-3.mp3 to WAV.
Converted /kaggle/input/pittcombined/PittCombined/mp3/642-

In [9]:
# Define a function to preprocess text data
def preprocess_text(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    # Insert text cleaning code here as necessary
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
    outputs = model(**inputs)
    return outputs.last_hidden_state.squeeze().detach().numpy()

In [10]:
print(cha_files)

['/kaggle/input/pittcombined/PittCombined/cha/672-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/361-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/237-1f.cha', '/kaggle/input/pittcombined/PittCombined/cha/045-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/212-0f.cha', '/kaggle/input/pittcombined/PittCombined/cha/114-1.cha', '/kaggle/input/pittcombined/PittCombined/cha/280-2.cha', '/kaggle/input/pittcombined/PittCombined/cha/033-0.cha', '/kaggle/input/pittcombined/PittCombined/cha/030-1s.cha', '/kaggle/input/pittcombined/PittCombined/cha/450-1r.cha', '/kaggle/input/pittcombined/PittCombined/cha/005-2r.cha', '/kaggle/input/pittcombined/PittCombined/cha/175-3.cha', '/kaggle/input/pittcombined/PittCombined/cha/002-3.cha', '/kaggle/input/pittcombined/PittCombined/cha/642-0r.cha', '/kaggle/input/pittcombined/PittCombined/cha/061-1f.cha', '/kaggle/input/pittcombined/PittCombined/cha/672-0s.cha', '/kaggle/input/pittcombined/PittCombined/cha/109-3.cha', '/kaggle/input/pittco

In [11]:
# Process each file
text_embeddings = [preprocess_text(f) for f in cha_files]

In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assuming text_embeddings and audio_features are lists of numpy arrays
# Convert lists to numpy arrays
text_features = np.array(text_embeddings)
audio_features = np.stack(audio_features)

In [13]:
print(text_features)

[[[-9.72423851e-02  2.65292767e-02 -1.16973016e-02 ... -1.62301913e-01
   -8.40255991e-02 -5.36014102e-02]
  [-2.14526907e-01 -2.97879130e-01  9.62144136e-02 ... -5.94711602e-01
   -1.68269202e-01 -8.98384154e-02]
  [ 1.21773235e-01 -1.52687028e-01  2.21801959e-02 ...  9.35916901e-02
   -4.50082794e-02 -1.74499080e-02]
  ...
  [ 1.07118469e-02 -2.52554297e-01  1.68749228e-01 ... -1.35804445e-01
    1.75554603e-02 -7.96999708e-02]
  [ 1.07118469e-02 -2.52554297e-01  1.68749228e-01 ... -1.35804445e-01
    1.75554603e-02 -7.96999708e-02]
  [ 1.07118469e-02 -2.52554297e-01  1.68749228e-01 ... -1.35804445e-01
    1.75554603e-02 -7.96999708e-02]]

 [[-4.85320464e-02  7.57615641e-03 -2.15541124e-02 ... -1.84362978e-01
   -4.46568839e-02 -1.31633785e-02]
  [-1.75419942e-01 -2.35273778e-01  1.45000443e-02 ... -4.50662255e-01
   -9.57441628e-02 -6.15313835e-02]
  [ 9.20190886e-02 -1.98194921e-01  4.49750982e-02 ...  9.37285423e-02
   -1.21140585e-03 -6.55166581e-02]
  ...
  [-1.67882591e-02 -9.8

In [14]:
print(audio_features)

[[-3.47991486e+02  1.45302032e+02  4.45041771e+01 ...  6.12083554e-01
   1.23227394e+00  1.04783356e+00]
 [-5.24387207e+02  1.05336517e+02  1.28090162e+01 ... -2.38694763e+00
   2.61250186e+00 -8.51398468e-01]
 [-5.30078796e+02  1.01879578e+02  1.27171783e+01 ...  5.68122864e-01
   3.40854812e+00  2.59326363e+00]
 ...
 [-5.29386108e+02  1.28645737e+02  1.83487225e+01 ... -8.83747768e+00
   6.54863119e+00 -1.02591574e+00]
 [-3.95146759e+02  1.15885178e+02  4.17677402e+00 ... -6.58706665e+00
   5.09216213e+00  2.18659312e-01]
 [-4.03731293e+02  1.39500687e+02  1.33597975e+01 ... -3.78502274e+00
  -2.49562716e+00 -8.61640739e+00]]


In [15]:
text_features = np.mean(text_features, axis=1)


In [16]:
print(text_features)

[[-0.02062482 -0.1828437   0.11907057 ... -0.17604293 -0.01005977
  -0.0602938 ]
 [ 0.00953281 -0.10941681  0.0154051  ... -0.33703595 -0.00828814
  -0.05988922]
 [-0.07635356 -0.09679706  0.02321012 ... -0.25502518 -0.01188354
  -0.05803617]
 ...
 [-0.06163239 -0.09603593  0.03463804 ... -0.26046416 -0.02427924
  -0.08099649]
 [-0.05107031 -0.08000761  0.04030335 ... -0.28841183 -0.01864949
  -0.06233644]
 [-0.00087727 -0.10072947  0.04577157 ... -0.30943063 -0.02483274
  -0.03032402]]


In [17]:
# Ensure audio_features is 2D (it should already be if you've extracted features correctly)
if audio_features.ndim > 2:
    audio_features = np.mean(audio_features, axis=1)

In [18]:
# Combine text and audio features
combined_features = np.concatenate([text_features, audio_features], axis=1)

In [19]:
print(combined_features)

[[-2.06248220e-02 -1.82843700e-01  1.19070575e-01 ...  6.12083554e-01
   1.23227394e+00  1.04783356e+00]
 [ 9.53280553e-03 -1.09416805e-01  1.54051008e-02 ... -2.38694763e+00
   2.61250186e+00 -8.51398468e-01]
 [-7.63535649e-02 -9.67970639e-02  2.32101232e-02 ...  5.68122864e-01
   3.40854812e+00  2.59326363e+00]
 ...
 [-6.16323873e-02 -9.60359275e-02  3.46380398e-02 ... -8.83747768e+00
   6.54863119e+00 -1.02591574e+00]
 [-5.10703065e-02 -8.00076127e-02  4.03033495e-02 ... -6.58706665e+00
   5.09216213e+00  2.18659312e-01]
 [-8.77273094e-04 -1.00729465e-01  4.57715727e-02 ... -3.78502274e+00
  -2.49562716e+00 -8.61640739e+00]]


In [20]:
# Normalize the combined features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(combined_features)

In [21]:
import re

def extract_diagnostic_code(cha_file):
    # Read the content of the .cha file
    with open(cha_file, 'r') as file:
        content = file.read()

    # Regex to find the diagnosis in the file content based on the updated structure
    match = re.search(r'@ID:\s*[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|([^|]*)\|\|', content)
    if match:
        return match.group(1).strip()
    return None

labels = [extract_diagnostic_code(f) for f in cha_files]

for label in labels[:10]:  # Print first 10 labels to verify
    print(label)


PossibleAD
PossibleAD
MCI
Control
MCI
Control
Control
MCI
MCI
ProbableAD


In [22]:
from sklearn.preprocessing import LabelEncoder

# Encode labels as integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


In [23]:
print(encoded_labels)

[5 5 2 0 2 0 0 2 2 7 7 0 0 7 2 5 0 8 7 2 4 7 3 0 7 5 5 3 7 0 2 7 2 2 8 7 2
 2 0 3 7 5 2 3 6 7 0 0 7 3 2 7 0 7 0 7 7 8 7 7 7 0 2 5 2 5 2 0 2 7 2 5 7 2
 6 7 3 5 2 5 5 2 0 0 0 0 3 0 8 0 2 5 5 3 0 5 7 0 5 8 8 5 2 5 7 2 7 0 0 7 5
 8 7 5 0 5 2 7 2 5 5 7 2 3 2 5 2 2 2 2 5 8 0 0 7 8 3 2 7 5 0 5 2 7 2 5 5 5
 0 7 5 2 0 7 0 0 8 5 5 5 0 5 0 5 2 0 4 2 2 7 7 7 0 0 0 5 2 8 5 0 7 0 2 7 0
 8 2 5 7 5 7 2 2 7 0 5 5 5 0 7 2 2 4 2 3 5 7 0 0 5 2 0 8 0 5 8 8 6 7 7 7 2
 2 2 0 8 5 0 6 7 5 4 7 5 7 0 2 7 5 7 8 5 0 1 5 7 5 0 3 2 2 2 5 0 5 7 8 7 8
 0]


In [24]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, Conv1D, GlobalAveragePooling1D, BatchNormalization
from tensorflow.keras.optimizers import Adam

2024-08-15 09:27:10.014748: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-15 09:27:10.014952: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-15 09:27:10.184627: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [25]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

In [26]:
# Split data into train, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, encoded_labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Finding the maximum label value across all datasets to ensure consistency
max_label = max(np.max(y_train), np.max(y_val), np.max(y_test)) + 1  # Plus one because classes are zero-indexed

# One-hot encode the labels with a consistent number of classes across datasets
y_train = to_categorical(y_train, num_classes=max_label)
y_val = to_categorical(y_val, num_classes=max_label)
y_test = to_categorical(y_test, num_classes=max_label)




In [27]:
print("y_test",y_test)

y_test [[0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0

In [28]:
from tensorflow.keras.regularizers import l2

# Model architecture
input_text = Input(shape=(768,))  # Text features from Clinical BERT
input_audio = Input(shape=(13,))  # Audio features, assuming MFCCs with 13 coefficients

# Text pathway
text_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.1))(input_text)
text_bn = BatchNormalization()(text_dense)  # Add batch normalization
text_out = Dropout(0.5)(text_bn)  # Increase dropout rate


In [29]:
from tensorflow.keras.layers import Reshape

# Audio pathway
audio_reshape = Reshape((13, 1))(input_audio)
conv1 = Conv1D(64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.1))(audio_reshape)
conv1_bn = BatchNormalization()(conv1)
conv1_pool = GlobalAveragePooling1D()(conv1_bn)
audio_out = Dropout(0.5)(conv1_pool)  # Increase dropout rate

In [30]:
# Fusion and output
concatenated = Concatenate()([text_out, audio_out])
dense_layer = Dense(64, activation='relu', kernel_regularizer=l2(0.1))(concatenated)  # Regularize dense layer

# Assuming y_train has already been one-hot encoded correctly
num_classes = y_train.shape[1]

# Adjust your output layer
output_layer = Dense(num_classes, activation='softmax')(dense_layer)


In [31]:
print(concatenated)

<KerasTensor shape=(None, 192), dtype=float32, sparse=False, name=keras_tensor_10>


In [32]:
from tensorflow.keras.metrics import Precision, Recall

# Compile model
model = Model(inputs=[input_text, input_audio], outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', 
              metrics=['accuracy', Precision(), Recall()])


In [33]:
# Fit model on training data
model.fit([X_train[:, :768], X_train[:, 768:]], y_train, validation_data=([X_val[:, :768], X_val[:, 768:]], y_val), epochs=100, batch_size=32)


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 148ms/step - accuracy: 0.1132 - loss: 33.9507 - precision: 0.0174 - recall: 0.0034 - val_accuracy: 0.2692 - val_loss: 31.1006 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1350 - loss: 30.7585 - precision: 0.1979 - recall: 0.0370 - val_accuracy: 0.3077 - val_loss: 28.5119 - val_precision: 0.2500 - val_recall: 0.0192
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3313 - loss: 27.8379 - precision: 0.5936 - recall: 0.0871 - val_accuracy: 0.3462 - val_loss: 26.0637 - val_precision: 0.4167 - val_recall: 0.0962
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3478 - loss: 25.2597 - precision: 0.5362 - recall: 0.1341 - val_accuracy: 0.3846 - val_loss: 23.7040 - val_precision: 0.3846 - val_recall: 0.0962
Epoch 5/100
[1

<keras.src.callbacks.history.History at 0x7c648edab070>

In [34]:
# Evaluate the model on the test set
evaluation = model.evaluate([X_test[:, :768], X_test[:, 768:]], y_test)
print(f'Accuracy: {evaluation[1]*100:.2f}%, Precision: {evaluation[2]*100:.2f}%, Recall: {evaluation[3]*100:.2f}%')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5080 - loss: 2.0931 - precision: 0.5741 - recall: 0.3894 
Accuracy: 48.08%, Precision: 52.78%, Recall: 36.54%
