In [None]:
# Install library requests jika belum terinstal
%pip install requests



In [None]:
# Mount Google Drive untuk menyimpan data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import library yang diperlukan
import os
import zipfile
import pandas as pd
import shutil
import numpy as np
import requests
from io import BytesIO

# Fungsi untuk Membaca Participant_ID

In [None]:
# Fungsi untuk membaca Participant_ID dari CSV TRAIN
def read_participantID_train_from_csv():
    df = pd.read_csv('https://dcapswoz.ict.usc.edu/wwwdaicwoz/train_split_Depression_AVEC2017.csv')
    participant_ID = df['Participant_ID']
    return participant_ID

# Fungsi untuk membaca Participant_ID dari CSV TEST
def read_participantID_test_from_csv():
    df = pd.read_csv('https://dcapswoz.ict.usc.edu/wwwdaicwoz/test_split_Depression_AVEC2017.csv')
    participant_ID = df['Participant_ID'].tolist()
    return participant_ID

# Fungsi untuk membaca Participant_ID dari CSV DEV
def read_participantID_dev_from_csv():
    df = pd.read_csv('https://dcapswoz.ict.usc.edu/wwwdaicwoz/dev_split_Depression_AVEC2017.csv')
    participant_ID = df['Participant_ID'].tolist()
    return participant_ID

# Fungsi untuk Membaca PHQ8_Binary

In [None]:
# Fungsi untuk membaca PHQ8_Binary dari CSV TRAIN
def read_PHQ8Train_from_csv(participant_ID):
    df = pd.read_csv('https://dcapswoz.ict.usc.edu/wwwdaicwoz/train_split_Depression_AVEC2017.csv')
    PHQ8 = df.loc[df['Participant_ID'] == participant_ID, 'PHQ8_Binary'].values[0]
    return PHQ8

# Fungsi untuk membaca PHQ8_Binary dari CSV DEV
def read_PHQ8Dev_from_csv(participant_ID):
    df = pd.read_csv('https://dcapswoz.ict.usc.edu/wwwdaicwoz/dev_split_Depression_AVEC2017.csv')
    PHQ8 = df.loc[df['Participant_ID'] == participant_ID, 'PHQ8_Binary'].values[0]
    return PHQ8

# Fungsi untuk Memproses Data ZIP

In [None]:
# Fungsi untuk mendownload dan mengolah data dari file ZIP berdasarkan participant_ID dan kategori
def process_data(participant_ids, base_url, destination_base_folder, read_PHQ8_func):
    for participantID in participant_ids:
        zip_url = f"{base_url}{participantID}_P.zip"
        response = requests.get(zip_url)
        zip_file = zipfile.ZipFile(BytesIO(response.content))

        df_PHQ8 = read_PHQ8_func(participantID)

        # Tentukan kategori berdasarkan PHQ-8 (1 = 'depressed', 0 = 'normal')
        category = 'depressed' if df_PHQ8 == 1 else 'normal'
        print(df_PHQ8)

        # Buat folder untuk kategori jika belum ada
        destination_folder = os.path.join(destination_base_folder, category)
        os.makedirs(destination_folder, exist_ok=True)

        # Ekstrak file audio dari file ZIP ke dalam folder yang sesuai
        for file_info in zip_file.infolist():
            if file_info.filename.endswith('.wav'):
                with zip_file.open(file_info) as file:
                    with open(os.path.join(destination_folder, os.path.basename(file_info.filename)), 'wb') as output_file:
                        shutil.copyfileobj(file, output_file)

# Proses Data TRAIN

In [None]:
participant_ids_train = read_participantID_train_from_csv()
process_data(participant_ids_train, 'https://dcapswoz.ict.usc.edu/wwwdaicwoz/', os.path.join('drive', 'MyDrive', 'DAIC-WOZ Dataset', 'train'), read_PHQ8Train_from_csv)

0
0
0
0
0
0
0
0
0
0
1
1
1
0
0
1
0
0
0
1
0
0
1
1
0
0
0
1
1
1
1
1
1
1
1
1
1
0
0
0
1
0
0
0
0
0
0
0
1
0
0
1
0
1
0
0
1
0
0
0
0
0
0
1
0
1
1
0
0
0
0
0
1
0
0
0
0
1
0
0
1
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0


# Proses Data DEV

In [None]:
participant_ids_dev = read_participantID_dev_from_csv()
process_data(participant_ids_dev, 'https://dcapswoz.ict.usc.edu/wwwdaicwoz/', os.path.join('drive', 'MyDrive', 'DAIC-WOZ Dataset', 'dev'), read_PHQ8Dev_from_csv)


0
0
0
1
1
1
1
1
0
1
1
0
0
0
0
0
1
0
1
0
1
0
0
1
0
0
0
0
0
0
1
0
0
0
0


# Fungsi Preproses dan Ekstraksi Fitur Audio

In [None]:
import librosa
import soundfile as sf
from sklearn.model_selection import train_test_split

# Fungsi untuk preprocess audio segments

In [None]:
def preprocess_audio_segments(file_path, target_sr, segment_duration, skip_duration):
    audio, sr = librosa.load(file_path, sr=None)
    if sr != target_sr:
        audio = librosa.resample(y=audio, orig_sr=sr, target_sr=target_sr)
    segment_length = target_sr * segment_duration
    skip_length = target_sr * skip_duration
    audio = audio[skip_length:-skip_length]
    start_segment = audio[:segment_length]
    end_segment = audio[-segment_length:]
    processed_audio = np.concatenate((start_segment, end_segment))
    return processed_audio, target_sr

# Fungsi untuk ekstraksi fitur MFCC dari audio

In [None]:
def extract_mfcc_features(audio, sr, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    return mfccs

# Bagian: Definisi Parameter dan Folder Path
target_sr = 22050
segment_duration = 3 * 60
skip_duration = 30

# Folder paths
normal_folder = os.path.join('drive', 'MyDrive', 'DAIC-WOZ Dataset', 'train', 'normal')
depressed_folder = os.path.join('drive', 'MyDrive', 'DAIC-WOZ Dataset', 'train', 'depressed')

normal_files = [os.path.join(normal_folder, file) for file in os.listdir(normal_folder) if file.endswith('.wav')]
depressed_files = [os.path.join(depressed_folder, file) for file in os.listdir(depressed_folder) if file.endswith('.wav')]

# Gabungkan semua file
all_files = depressed_files + normal_files
labels = ['0'] * len(normal_files) + ['1'] * len(depressed_files)

# Bagi data ke train dan validation set
train_files, val_files, train_labels, val_labels = train_test_split(all_files, labels, test_size=0.2, random_state=42, stratify=labels)

# Array buat nyimpen audio yang udah di resample
train_audios = []
val_audios = []

# Array buat nyimpen fitur audio
train_mfcc_features = []
val_mfcc_features = []

# Preprocessing Audio dan Ekstraksi Fitur MFCC

In [None]:
# Preproses setiap audio file di train set
for file in train_files:
    audio, sr = preprocess_audio_segments(file, target_sr, segment_duration, skip_duration)
    train_audios.append(audio)
    mfccs = extract_mfcc_features(audio, sr)
    train_mfcc_features.append(mfccs)

# Preproses setiap audio file di validation set
for file in val_files:
    audio, sr = preprocess_audio_segments(file, target_sr, segment_duration, skip_duration)
    val_audios.append(audio)
    mfccs = extract_mfcc_features(audio, sr)
    val_mfcc_features.append(mfccs)

# Konversi list ke numpy array
train_mfcc_features = np.array(train_mfcc_features)
val_mfcc_features = np.array(val_mfcc_features)
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# Reshape fitur MFCC untuk menjadi input ke CNN
train_mfcc_features = train_mfcc_features[..., np.newaxis]
val_mfcc_features = val_mfcc_features[..., np.newaxis]

# Pembangunan Model CNN

In [None]:
# Import library TensorFlow dan Keras untuk membangun model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [None]:
# Bangun model CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(train_mfcc_features.shape[1], train_mfcc_features.shape[2], 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Konversi label ke tipe data integer
train_labels = np.array([int(label) for label in train_labels])
val_labels = np.array([int(label) for label in val_labels])

# Kompilasi model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print ringkasan model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 13, 15504, 32)     320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 6, 7752, 32)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 6, 7752, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 3, 3876, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3876, 128)      73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 1, 1938, 128)      0

# Training dan Evaluasi Model

In [None]:
# Training model
history = model.fit(
    train_mfcc_features,
    train_labels,
    epochs=50,
    batch_size=10,
    validation_data=(val_mfcc_features, val_labels)
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Evaluasi model
loss, accuracy = model.evaluate(train_mfcc_features, train_labels)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

val_loss, val_accuracy = model.evaluate(val_mfcc_features, val_labels)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')

Loss: 0.5413769483566284
Accuracy: 0.7529411911964417
Validation Loss: 0.5287379622459412
Validation Accuracy: 0.7272727489471436


# Menyimpan model dan ubah ke tf lite


In [None]:
# Save the model
model.save('thrive_model.h5')
print("Model has been saved as 'thrive_model.h5'.")

  saving_api.save_model(


Model has been saved as 'thrive_model.h5'.


In [None]:
# Convert the model to TFLite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [None]:
# Save the TFLite model
with open('thrive_model.tflite', 'wb') as f:
    f.write(tflite_model)
print("Model has been converted to TFLite and saved as 'thrive_model.tflite'.")

Model has been converted to TFLite and saved as 'thrive_model.tflite'.


In [None]:
# Simpan model .h5 ke Google Drive
!cp thrive_model.h5 /content/drive/MyDrive/

In [None]:
# Simpan model .tflite ke Google Drive
!cp thrive_model.tflite /content/drive/MyDrive/

# Prediksi Menggunakan Model HDF5

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('thrive_model.h5')
print("Model loaded from 'thrive_model.h5'.")

# Siapkan data untuk prediksi
sample_data = val_mfcc_features[0:1]  # Mengambil satu contoh untuk prediksi

# Lakukan prediksi
predictions = model.predict(sample_data)
print(f'Prediction: {predictions}')

# Tentukan ambang batas untuk klasifikasi
threshold = 1

# Interpretasikan hasil prediksi
if predictions[0][0] >= threshold:
    result = "depresi"
    label = 1
else:
    result = "normal"
    label = 0

print(f'Result: {result}')
print(f'Label: {label}')

Model loaded from 'thrive_model.h5'.
Prediction: [[0.17054704]]
Result: normal
Label: 0


# Prediksi Menggunakan Model TFLite

In [None]:
import numpy as np
import tensorflow as tf

# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path='thrive_model.tflite')
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Siapkan data untuk prediksi
sample_data = val_mfcc_features[0:1]  # Mengambil satu contoh untuk prediksi

# Pastikan data dalam format yang benar (float32)
sample_data = np.array(sample_data, dtype=np.float32)

# Set input tensor
interpreter.set_tensor(input_details[0]['index'], sample_data)

# Jalankan interpreter
interpreter.invoke()

# Get output tensor
predictions = interpreter.get_tensor(output_details[0]['index'])

# Interpretasikan hasil prediksi
threshold = 1
if predictions[0][0] >= threshold:
    result = "depresi"
    label = 1
else:
    result = "normal"
    label = 0

print(f'Result: {result}')
print(f'Label: {label}')
print(f'Prediction: {predictions}')

Result: normal
Label: 0
Prediction: [[0.17054707]]
