# Bachelor. Conveyor belt state classification using deep neural networks and data augmentation methods.

Vilnius University \\
Software Engineering \\
Student Armantas Pikšrys

## Mount data

In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print(gpu_devices)
if gpu_devices:
    print('Using GPU')
    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
else:
    print('Using CPU')

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras import backend as K
from keras.models import Sequential
from keras.layers import TimeDistributed, Conv1D, BatchNormalization, AveragePooling1D, Dropout, Flatten
from keras.layers import LSTM, Dense
from keras import layers
from keras.optimizers import Adam
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from pathlib import Path
from tqdm import tqdm

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GRU, Dense, RNN, GRUCell, Input
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import plot_model

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import six
from timeit import default_timer as timer

In [None]:
# Set the window size
window_size = 320

# Directory where your files are located
data_dir = "/kaggle/input/weight-data-kilos"  # Replace with the actual directory path

In [None]:
# Initialize empty lists for signals and labels
signals = []
labels = []

In [None]:
# Iterate over files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(data_dir, filename)
        if float(filename.split("_")[0]) == 5:

            # Load CSV data
            data = pd.read_csv(file_path)

            # Create signals using a sliding window
            num_rows = len(data)
            step_size = 1

            for i in range(0, num_rows - window_size + 1, step_size):
                window_data = data[i:i + window_size]
                signals.append(window_data)

In [None]:
# Convert the lists to NumPy arrays
signals_array = np.array(signals, dtype=np.float32)

In [None]:
signal_max = signals_array.max()
signal_min = signals_array.min()

In [None]:
data = (signals_array - signal_min) / (signal_max - signal_min)

In [None]:
data_max = data.max()
data_min = data.min()

data_max, data_min

In [None]:
signals_array.shape

In [None]:
!git clone https://github.com/abudesai/timeVAE.git

In [None]:
cd timeVAE

In [None]:
import os, warnings
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
# warnings.filterwarnings('ignore') 

# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # disabling gpu usage because my cuda is corrupted, needs to be fixed. 

import sys
import numpy as np , pandas as pd
import time
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from vae_dense_model import VariationalAutoencoderDense as VAE_Dense
from vae_conv_model import VariationalAutoencoderConv as VAE_Conv
from vae_conv_I_model import VariationalAutoencoderConvInterpretable as TimeVAE
import utils

In [None]:
cd ..

In [None]:
start = time.time()

vae_type = 'timeVAE' 

full_train_data = data
N, T, D = full_train_data.shape   
print('data shape:', N, T, D) 

In [None]:
# further split the training data into train and validation set - same thing done in forecasting task
perc_of_train_used = 20     # 5, 10, 20, 100    
valid_perc = 0.1
N_train = int(N * (1 - valid_perc))
N_valid = N - N_train

# Shuffle data
np.random.shuffle(full_train_data)

train_data = full_train_data[:N_train]
valid_data = full_train_data[N_train:]   
print("train/valid shapes: ", train_data.shape, valid_data.shape)    

In [None]:
scaled_train_data = train_data

scaled_valid_data = valid_data

In [None]:
import matplotlib.pyplot as plt

time_axis = np.arange(window_size)
# Plot all channels in a single plot
plt.figure(figsize=(12, 6))

plt.plot(time_axis, scaled_train_data[2][:, 0], label='Channel 1', color='blue')
plt.plot(time_axis, scaled_train_data[2][:, 1], label='Channel 2', color='green')
plt.plot(time_axis, scaled_train_data[2][:, 2], label='Channel 3', color='red')

plt.title('Signal Channels')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.legend()

plt.show()

In [None]:
# ----------------------------------------------------------------------------------
# instantiate the model     

latent_dim = 20

if vae_type == 'vae_dense': 
    vae = VAE_Dense( seq_len=T,  feat_dim = D, latent_dim = latent_dim, hidden_layer_sizes=[200,100], )
elif vae_type == 'vae_conv':
    vae = VAE_Conv( seq_len=T,  feat_dim = D, latent_dim = latent_dim, hidden_layer_sizes=[100, 200] )
elif vae_type == 'timeVAE':
    vae = TimeVAE( seq_len=T,  feat_dim = D, latent_dim = latent_dim, hidden_layer_sizes=[50, 100, 200],        #[80, 200, 250] [50, 100, 200]
            reconstruction_wt = 15.0,
            # ---------------------
            # disable following three arguments to use the model as TimeVAE_Base. Enabling will convert to Interpretable version.
            # Also set use_residual_conn= False if you want to only have interpretable components, and no residual (non-interpretable) component. 

#             trend_poly=2, 
#             custom_seas = [ (6,1), (7, 1), (8,1), (9,1)] ,     # list of tuples of (num_of_seasons, len_per_season)
#             use_scaler = True,

            #---------------------------
            use_residual_conn = True
        )   
else:  raise Exception('wut')


vae.compile(optimizer=Adam())
# vae.summary() ; sys.exit()

early_stop_loss = 'loss'
early_stop_callback = EarlyStopping(monitor=early_stop_loss, min_delta = 1e-1, patience=10) 
reduceLR = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5)

vae.fit(
    scaled_train_data, 
    batch_size = 32,
    epochs=200,
    shuffle = True,
    callbacks=[early_stop_callback, reduceLR],
    verbose = 1
)

# ----------------------------------------------------------------------------------    

In [None]:
# # ----------------------------------------------------------------------------------    
# # save model 
# model_dir = './model/'
# file_pref = f'vae_{vae_type}_sine_perc_{perc_of_train_used}_iter_{0}_'
# vae.save(model_dir, file_pref)

# # ----------------------------------------------------------------------------------

In [None]:
def draw_orig_and_post_pred_sample_custom(orig, reconst, n):

    fig, axs = plt.subplots(n, 2, figsize=(10,6))
    i = 1
    for _ in range(n):
        rnd_idx = np.random.choice(len(orig))
        o = orig[rnd_idx]
        r = reconst[rnd_idx]

        plt.subplot(n, 2, i)
        plt.imshow(o, 
            # cmap='gray', 
            aspect='auto')
        # plt.title("Original")
        i += 1

        plt.subplot(n, 2, i)
        plt.imshow(r, 
            # cmap='gray', 
            aspect='auto')
        # plt.title("Sampled")
        i += 1

    fig.suptitle("Originalūs ir Rekonstruoti duomenys")
    fig.tight_layout()
    plt.show()

In [None]:
# ----------------------------------------------------------------------------------
# visually check reconstruction 
X = scaled_train_data

x_decoded = vae.predict(scaled_train_data)
print('x_decoded.shape', x_decoded.shape)

### compare original and posterior predictive (reconstructed) samples
draw_orig_and_post_pred_sample_custom(X, x_decoded, n=5)


# # Plot the prior generated samples over different areas of the latent space
if latent_dim == 2: utils.plot_latent_space_timeseries(vae, n=8, figsize = (20, 10))

# # ----------------------------------------------------------------------------------

In [None]:
import matplotlib.pyplot as plt

time_axis = np.arange(window_size)
# Plot all channels in a single plot
plt.figure(figsize=(12, 6))

plt.plot(time_axis, scaled_train_data[0][:, 0], label='Kanalas 1', color='blue')
plt.plot(time_axis, scaled_train_data[0][:, 1], label='Kanalas 2', color='green')
plt.plot(time_axis, scaled_train_data[0][:, 2], label='Kanalas 3', color='red')

plt.title('Originalas')
plt.xlabel('Laikas')
plt.ylabel('Amplitudė')
plt.legend()

plt.show()

time_axis = np.arange(window_size)
# Plot all channels in a single plot
plt.figure(figsize=(12, 6))

plt.plot(time_axis, x_decoded[0][:, 0], label='Kanalas 1', color='blue')
plt.plot(time_axis, x_decoded[0][:, 1], label='Kanalas 2', color='green')
plt.plot(time_axis, x_decoded[0][:, 2], label='Kanalas 3', color='red')

plt.title('Rekonstrukcija')
plt.xlabel('Laikas')
plt.ylabel('Amplitudė')
plt.legend()

plt.show()

In [None]:
# draw random prior samples
num_samples = N_train
# print("num_samples: ", num_samples)

samples = vae.get_prior_samples(num_samples=3)

fig, axs = plt.subplots(3, 1, figsize=(6,8))
for i in range(3):
    s = samples[i]
    axs[i].plot(s)    

fig.suptitle("Generated Samples (Scaled)")
fig.tight_layout()
plt.show()

fig, axs = plt.subplots(3, 1, figsize=(6,8))
i = 0
for i in range(3):
    rnd_idx = np.random.choice(len(samples))
    s = scaled_train_data[rnd_idx]
    axs[i].plot(s)    
    i += 1 

fig.suptitle("Real Samples (Scaled)")
fig.tight_layout()
plt.show()

# inverse-transform scaling 
# samples = scaler.inverse_transform(samples)
# print('shape of gen samples: ', samples.shape) 

In [None]:
# draw random prior samples
num_samples = N_train
# print("num_samples: ", num_samples)

#metrics_samples = vae.get_prior_samples(num_samples=N_train)
metrics_samples = vae.get_prior_samples(num_samples=40000)

In [None]:
scaled_train_data.shape, metrics_samples.shape

In [None]:
from sklearn.metrics import mean_squared_error

# Calculate Mean Squared Error (MSE) for each dimension
mse_per_dimension = np.mean(np.mean((scaled_train_data - metrics_samples)**2, axis=0))
print(f'Mean Squared Error per Dimension: {mse_per_dimension}')

In [None]:
samples = metrics_samples * (signal_max - signal_min) + signal_min

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(6,8))
for i in range(3):
    s = samples[i]
    axs[i].plot(s)    

fig.suptitle("Generated Samples (Scaled)")
fig.tight_layout()
plt.show()