In [48]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import csv
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import json
import random

### Data Preparation

In [77]:
def get_dataset(directory):
    data = pd.DataFrame(columns=['data', 'label'])
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                temp_label = filename.split('.')[0].split('_')[0]
                if 'a' in temp_label:
                    label ='alcoholic'
                else:
                    label = 'control'

                temp_data = pd.DataFrame(columns=['data', 'label'], index=[0])

                with open(rel_path, 'r') as file:
                    
                    rwb = list(csv.reader(file, delimiter=","))[0]
                                
                temp_data['data'][0] = rwb
                temp_data['label'] = label
                
                # decomp = np.arange(0, 366)
                # plt.plot(decomp, df_data)
                # plt.xlabel('Dimension Number')
                # plt.ylabel('Wavelet Bispectrum Energy')
                # plt.show()
                data = pd.concat([data, temp_data], ignore_index=True)
                
    return data

In [78]:
train = get_dataset('../smni_cmi_test_feature_256')
train

Unnamed: 0,data,label
0,"[1967.1470566101825, 3.23941747459607, 109.876...",alcoholic
1,"[815192.3973362085, 9.180344543918327, 313.735...",alcoholic
2,"[2089.4532860652866, 1.5419364533356181, 38.81...",alcoholic
3,"[141.40776502451726, 2.222840856468907, 63.286...",alcoholic
4,"[718895.2088581024, 8.311584488329613, 275.295...",alcoholic
...,...,...
595,"[4204.407595394246, 4.547377209482676, 126.426...",control
596,"[10893.629796959656, 11.365963313005485, 423.4...",control
597,"[23381.95429643911, 47.25945992840813, 1099.26...",control
598,"[13675.119183023591, 14.647877034680041, 481.1...",control


In [11]:
def get_batch(path):
    # loading extracted feature & label
    x, y = get_dataset(path)
    y = pd.DataFrame(y)
    
    # Encode the labels
    label_map = {"alcoholic": 1, "control": 0}
    y[0] = y[0].map(label_map)
    
    # y = keras.utils.to_categorical(y[0])
    dataset = tf.data.Dataset.from_tensor_slices((x, y[0] ))
    dataset = dataset.shuffle(len(y[0] )).batch(32)

    return dataset

### Model Definition

In [12]:
def create_model():
    model = keras.models.Sequential()

    model.add(layers.Input(shape=(366,)))
    model.add(layers.Reshape((366, 1)))

    model.add(layers.Conv1D(filters=16, kernel_size=4, activation="relu"))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv1D(filters=8, kernel_size=2, activation="relu"))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.BatchNormalization())

    model.add(layers.Flatten())

    model.add(layers.Dense(512, activation="relu"))

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(256, activation="relu"))

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(1, activation="sigmoid"))

    return model

### Main Program

In [29]:
train = get_batch('../Features/smni_cmi_train_feature_256')

train_size = int(len(list(train.as_numpy_iterator()))*0.8)

train_ds = train.take(train_size)
val_ds = train.skip(train_size)


[5.29638874e+03 2.17645241e+01 4.72748622e+02 6.57318991e+02
 1.80124505e+02 4.95840833e+02 5.05783248e+03 5.52559065e+01
 1.63587857e+03 2.19382358e+03 5.62882150e+02 6.62510980e+02
 3.26333015e+03 2.19261254e+02 4.36457097e+03 4.94797987e+03
 1.01937328e+03 7.03149075e+02 3.84744723e+03 2.20349702e+02
 5.46015372e+03 9.30246819e+03 2.43493156e+03 8.58593213e+02
 1.43811062e+03 3.42284260e+00 7.83790599e+01 7.89789993e+01
 5.28887672e+01 1.22785313e+02 1.27342302e+03 5.21820052e+00
 1.13412417e+02 1.04057837e+02 4.86492914e+01 1.26299338e+02
 5.90094550e+02 2.98244548e-01 5.82626181e+00 9.06918388e+00
 1.14141799e+01 5.55421299e+01 5.78565416e+02 8.11983147e+00
 1.80328512e+02 2.29613985e+02 4.14677593e+01 6.27728503e+01
 5.10022220e+02 7.24098938e+00 1.40129755e+02 1.25856171e+02
 3.73181591e+01 3.63783217e+01 2.68023295e+02 1.09345140e+01
 2.60376430e+02 4.85217669e+02 1.02263923e+02 5.60732773e+01
 8.40169857e+02 8.23027764e+01 1.38198421e+03 1.24208316e+03
 3.50484181e+02 1.381917

In [14]:
for x in train_ds:
    print(x)

(<tf.Tensor: shape=(32, 366), dtype=float64, numpy=
array([[9.98259657e+04, 1.32719119e+01, 2.33840398e+02, ...,
        9.07874280e-01, 6.69552821e+00, 8.46784040e+00],
       [8.02906046e+05, 3.68865309e+00, 1.51593314e+02, ...,
        3.37594530e-01, 1.49294109e+00, 1.28206609e+01],
       [8.87036102e+05, 5.37274203e+05, 3.46380662e+06, ...,
        2.66609618e+00, 7.93502763e+00, 6.14072400e+00],
       ...,
       [2.76511616e+04, 4.14331914e+00, 1.38006021e+02, ...,
        7.18125385e+00, 2.87394185e+01, 1.93134487e+01],
       [3.34348023e+03, 2.28978477e+00, 6.60791973e+01, ...,
        8.72669078e+00, 2.49705670e+01, 5.50607973e+01],
       [1.54015996e+04, 1.27718703e+00, 3.48005743e+01, ...,
        1.75027836e+00, 5.65519558e+00, 8.48722983e+00]])>, <tf.Tensor: shape=(32,), dtype=int64, numpy=
array([0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype=int64)>)
(<tf.Tensor: shape=(32, 366), dtype=float64, numpy=
ar

In [15]:
model = create_model()
model.summary()

model.compile(loss='binary_crossentropy', optimizer= tf.keras.optimizers.Adam(0.2), metrics=['acc'])

# Train the model
history = model.fit(train_ds, epochs=1000, validation_data=(val_ds))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 366, 1)            0         
                                                                 
 conv1d_2 (Conv1D)           (None, 363, 16)           80        
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 181, 16)          0         
 1D)                                                             
                                                                 
 batch_normalization (BatchN  (None, 181, 16)          64        
 ormalization)                                                   
                                                                 
 conv1d_3 (Conv1D)           (None, 180, 8)            264       
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 90, 8)           