In [16]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import preprocessing, model_selection
import csv
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import json
import random

### Data Preparation

In [17]:
def get_dataset(directory):
    data = pd.DataFrame(columns=['data', 'label'])
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                temp_label = filename.split('.')[0].split('_')[0]
                if 'a' in temp_label:
                    label ='alcoholic'
                else:
                    label = 'control'

                temp_data = pd.DataFrame(columns=['data', 'label'], index=[0])

                with open(rel_path, 'r') as file:
                    
                    rwb = list(csv.reader(file, delimiter=","))[0]
                    rwb = np.asarray(rwb).astype(np.float32)
                    rwb = [round(num, 1) for num in rwb]
                                
                temp_data['data'][0] = rwb
                temp_data['label'] = label
                
                # decomp = np.arange(0, 366)
                # plt.plot(decomp, df_data)
                # plt.xlabel('Dimension Number')
                # plt.ylabel('Wavelet Bispectrum Energy')
                # plt.show()
                data = pd.concat([data, temp_data], ignore_index=True)
    label_map = {"alcoholic": 1, "control": 0}
    data['label_map'] = data['label'].map(label_map)      
    return data

In [18]:
train = get_dataset('../smni_cmi_test_feature_256')
train

Unnamed: 0,data,label,label_map
0,"[1967.1, 3.2, 109.9, 235.6, 96.9, 234.9, 1402....",alcoholic,1
1,"[815192.4, 9.2, 313.7, 518.6, 5207.2, 90536.8,...",alcoholic,1
2,"[2089.5, 1.5, 38.8, 74.0, 219.5, 321.5, 3344.4...",alcoholic,1
3,"[141.4, 2.2, 63.3, 118.8, 125.4, 134.6, 397.4,...",alcoholic,1
4,"[718895.2, 8.3, 275.3, 1211.7, 4226.8, 74752.7...",alcoholic,1
...,...,...,...
595,"[4204.4, 4.5, 126.4, 254.6, 477.9, 1428.1, 124...",control,0
596,"[10893.6, 11.4, 423.5, 2419.1, 2049.1, 3873.9,...",control,0
597,"[23382.0, 47.3, 1099.3, 1720.6, 1464.7, 2600.7...",control,0
598,"[13675.1, 14.6, 481.1, 1632.0, 673.5, 1239.6, ...",control,0


In [31]:
def get_batch(path):
    # loading extracted feature & label
    x = get_dataset(path)

    scaler = preprocessing.MinMaxScaler()

    series_list = [
        i for i in x["data"]
    ]

    # series_list = series_list.reshape(-1, 366, 1)

    labels_list = [i for i in x["label_map"]]
        
    # y = keras.utils.to_categorical(y[0])
    dataset = tf.data.Dataset.from_tensor_slices((series_list,labels_list))
    dataset = dataset.shuffle(len(labels_list)).batch(32)

    return dataset

### Model Definition

In [20]:
def create_model():
    model = keras.models.Sequential()

    model.add(layers.Input(shape=(366,)))
    model.add(layers.Reshape((366, 1)))

    model.add(layers.Conv1D(filters=16, kernel_size=4, activation="relu"))
    model.add(layers.MaxPooling1D(pool_size=4))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv1D(filters=8, kernel_size=2, activation="relu"))
    model.add(layers.MaxPooling1D(pool_size=4))
    model.add(layers.BatchNormalization())

    model.add(layers.Flatten())

    model.add(layers.Dense(512, activation="relu"))

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(256, activation="relu"))

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(1, activation="sigmoid"))

    return model

### Main Program

In [32]:
train = get_batch('../smni_cmi_test_feature_256')

train_size = int(len(list(train.as_numpy_iterator()))*0.8)

train_ds = train.take(train_size)
val_ds = train.skip(train_size)


In [33]:
for x in train_ds:
    print(x)

(<tf.Tensor: shape=(32, 366), dtype=float32, numpy=
array([[3.753773e+05, 2.253000e+03, 6.855600e+03, ..., 2.830000e+01,
        2.865000e+02, 7.520000e+01],
       [1.122410e+04, 8.000000e-01, 1.810000e+01, ..., 6.000000e-01,
        1.300000e+00, 3.900000e+00],
       [3.249440e+05, 1.280000e+01, 5.687000e+02, ..., 3.380000e+01,
        1.574000e+02, 4.130000e+01],
       ...,
       [1.237266e+05, 4.000000e-01, 1.450000e+01, ..., 1.000000e+00,
        4.400000e+00, 1.070000e+01],
       [3.830080e+04, 4.100000e+00, 1.195000e+02, ..., 2.100000e+00,
        9.400000e+00, 3.070000e+01],
       [4.618430e+04, 6.000000e-01, 3.590000e+01, ..., 3.000000e-01,
        2.200000e+00, 4.700000e+00]], dtype=float32)>, <tf.Tensor: shape=(32,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1])>)
(<tf.Tensor: shape=(32, 366), dtype=float32, numpy=
array([[5.1075301e+04, 6.5000000e+00, 1.8339999e+02, ..., 4.0000001e-01,

In [34]:
model = create_model()
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

# Train the model
history = model.fit(train_ds, epochs=1000, validation_data=(val_ds))

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_2 (Reshape)         (None, 366, 1)            0         
                                                                 
 conv1d_4 (Conv1D)           (None, 363, 16)           80        
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 90, 16)           0         
 1D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 90, 16)           64        
 hNormalization)                                                 
                                                                 
 conv1d_5 (Conv1D)           (None, 89, 8)             264       
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 22, 8)           