In [4]:
import numpy as np
import pandas as pd
import glob
import os

def load_emothaw_data(directory_path):
    all_data = []
    all_labels = []

    # Use glob to find all SVC files in the specified directory
    file_paths = glob.glob(os.path.join(directory_path, "*.svc"))
    if not file_paths:
        raise ValueError("No files found in the specified directory")

    for file_path in file_paths:
        print(f"Processing file: {file_path}")
        try:
            # Extract the label from the filename
            label = int(os.path.basename(file_path).split('_')[0])  # Assuming the label is the part before the first '_'

            # Load the file and remove trailing spaces
            with open(file_path, 'r') as f:
                lines = [line.strip() for line in f.readlines() if line.strip()]  # Strip whitespace and ignore empty lines

            # Process lines into a DataFrame by splitting by whitespace
            df = pd.DataFrame([line.split() for line in lines])  # Split by whitespace

            if df.empty:
                print(f"Warning: {file_path} is empty after processing. Skipping file.")
                continue

            # The first row gives the number of rows starting from the second row
            total_rows = int(df.iloc[0, 0])  # Number of sequences

            # Extract the feature data from the rows starting from the second row
            data = df.iloc[1:, :].values  # All columns are features

            if data.shape[0] < total_rows:
                raise ValueError(f"Insufficient data: expected {total_rows}, but found {data.shape[0]}")

            # Reshape the data to ensure each row has 7 values
            reshaped_data = []
            for row in data:
                # Ensure each row has exactly 7 columns
                if len(row) == 7:
                    reshaped_data.append(row)
                else:
                    print(f"Warning: Row does not have 7 values, skipping: {row}")

            # Convert the list of rows to a NumPy array only if not empty
            if reshaped_data:
                reshaped_data = np.array(reshaped_data, dtype=float)  # Ensure float type

                # Check that reshaped_data has enough rows
                if reshaped_data.shape[0] < total_rows:
                    raise ValueError(f"Insufficient data: expected {total_rows}, but found {reshaped_data.shape[0]}")

                # Append the reshaped data and labels to the lists
                all_data.append(reshaped_data)
                all_labels.append([label] * reshaped_data.shape[0])

                # Print the shape of reshaped data for debugging
                print(f"Shape of data from file {file_path}: {reshaped_data.shape}")

        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
            continue

    if not all_data or not all_labels:
        raise ValueError("No valid data found in the directory")

    # Display shapes of individual entries in all_data
    print("Shapes of all_data before concatenation:")
    for i, data_array in enumerate(all_data):
        print(f"Data array {i} shape: {data_array.shape}")

    # Attempt to concatenate all data and labels from the files
    try:
        all_data = np.concatenate(all_data, axis=0)  # Concatenate along the sample axis
    except Exception as e:
        print(f"Error during concatenation: {e}")

    all_labels = np.concatenate(all_labels, axis=0)  # Concatenate labels

    return all_data, all_labels

# Example usage
data, labels = load_emothaw_data(r'../test/samplefew/')  # Use raw string for paths


Processing file: ../test/samplefew\0_hw00001(2).svc
Shape of data from file ../test/samplefew\0_hw00001(2).svc: (3444, 7)
Processing file: ../test/samplefew\0_hw00001(2)21.svc
Shape of data from file ../test/samplefew\0_hw00001(2)21.svc: (2086, 7)
Processing file: ../test/samplefew\0_hw00003(2).svc
Shape of data from file ../test/samplefew\0_hw00003(2).svc: (3959, 7)
Processing file: ../test/samplefew\0_hw00003.svc
Shape of data from file ../test/samplefew\0_hw00003.svc: (3102, 7)
Processing file: ../test/samplefew\0_hw00005(2).svc
Shape of data from file ../test/samplefew\0_hw00005(2).svc: (1531, 7)
Processing file: ../test/samplefew\0_hw00006(2).svc
Shape of data from file ../test/samplefew\0_hw00006(2).svc: (4588, 7)
Processing file: ../test/samplefew\0_hw00006(3).svc
Shape of data from file ../test/samplefew\0_hw00006(3).svc: (4094, 7)
Processing file: ../test/samplefew\0_hw00007(2).svc
Shape of data from file ../test/samplefew\0_hw00007(2).svc: (3457, 7)
Processing file: ../test/s

In [5]:
import numpy as np
import logging
from tqdm import tqdm
from datetime import datetime

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Function to extract time-domain features with progress bar and logging
def extract_time_domain_features(data):
    logger.info("Starting time-domain feature extraction")
    start_time = datetime.now()

    time_features = []
    
    # Use tqdm to create a progress bar
    for sample in tqdm(data, desc="Extracting features", unit="sample"):
        sample_features = []
        for feature in sample.T:  # Assuming the features are along the last axis
            feature_stats = [
                np.mean(feature),
                np.std(feature),
                np.min(feature),
                np.max(feature),
                np.percentile(feature, 25),
                np.percentile(feature, 50),
                np.percentile(feature, 75),
            ]
            sample_features.extend(feature_stats)  # Append the computed stats to sample_features
        time_features.append(sample_features)  # Append the features for the sample

    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Feature extraction completed in {elapsed_time}")

    return np.array(time_features)

# Assuming 'data' is your input 3D array (samples, sequence length, features)
time_domain_features = extract_time_domain_features(data)



2024-10-13 14:36:00,457 - Starting time-domain feature extraction
Extracting features: 100%|██████████| 78784/78784 [05:42<00:00, 229.89sample/s]
2024-10-13 14:41:43,368 - Feature extraction completed in 0:05:42.910339


In [8]:
print(time_domain_features)

[[5.0962e+04 0.0000e+00 5.0962e+04 ... 1.5000e+01 1.5000e+01 1.5000e+01]
 [5.0962e+04 0.0000e+00 5.0962e+04 ... 4.5000e+01 4.5000e+01 4.5000e+01]
 [5.0962e+04 0.0000e+00 5.0962e+04 ... 7.5000e+01 7.5000e+01 7.5000e+01]
 ...
 [1.2224e+04 0.0000e+00 1.2224e+04 ... 8.0600e+02 8.0600e+02 8.0600e+02]
 [1.2236e+04 0.0000e+00 1.2236e+04 ... 5.6800e+02 5.6800e+02 5.6800e+02]
 [1.2252e+04 0.0000e+00 1.2252e+04 ... 8.1000e+01 8.1000e+01 8.1000e+01]]


In [6]:
import numpy as np
import logging
from tqdm import tqdm
from datetime import datetime

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Extracting frequency domain features using FFT with progress bar and logging
def extract_frequency_domain_features(data):
    logger.info("Starting frequency-domain feature extraction")
    start_time = datetime.now()
    
    freq_features = []
    
    # Ensure the data has the correct dimensions
    if len(data.shape) != 2:
        raise ValueError(f"Expected data with 2 dimensions (samples, features), but got {data.shape}")
    
    # Use tqdm to create a progress bar
    for sample in tqdm(data, desc="Extracting frequency features", unit="sample"):
        sample_features = []
        
        # Check if the sample is a 1D array
        if len(sample.shape) != 1:
            raise ValueError(f"Expected each sample to be 1D, but got {sample.shape}")

        # Apply FFT to the sample (which is already 1D, hence no need for .T)
        freq_feature = np.fft.fft(sample)
        freq_magnitude = np.abs(freq_feature)
        dominant_freq = np.argmax(freq_magnitude)
        freq_energy = np.sum(freq_magnitude)
        
        sample_features.extend([dominant_freq, freq_energy])
        freq_features.append(sample_features)
    
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Frequency-domain feature extraction completed in {elapsed_time}")

    return np.array(freq_features)

# Example usage: Assuming `data` is a 2D array (samples, features)
# data = np.random.rand(100, 50)  # Example data
frequency_domain_features = extract_frequency_domain_features(data)

# Log the output
logger.info(f"Data: {data}")
logger.info(f"Frequency Domain Features: {frequency_domain_features}")


2024-10-13 14:41:43,827 - Starting frequency-domain feature extraction
Extracting frequency features: 100%|██████████| 78784/78784 [00:02<00:00, 35451.51sample/s]
2024-10-13 14:41:46,055 - Frequency-domain feature extraction completed in 0:00:02.226384
2024-10-13 14:41:46,119 - Data: [[5.0962000e+04 3.4188000e+04 1.6718871e+07 ... 1.9500000e+03
  6.2000000e+02 1.5000000e+01]
 [5.0962000e+04 3.4188000e+04 1.6718878e+07 ... 1.9500000e+03
  6.2000000e+02 4.5000000e+01]
 [5.0962000e+04 3.4188000e+04 1.6718886e+07 ... 1.9500000e+03
  6.2000000e+02 7.5000000e+01]
 ...
 [1.2224000e+04 2.9560000e+04 1.7049425e+07 ... 1.1900000e+03
  8.0000000e+02 8.0600000e+02]
 [1.2236000e+04 2.9560000e+04 1.7049433e+07 ... 1.2200000e+03
  8.1000000e+02 5.6800000e+02]
 [1.2252000e+04 2.9560000e+04 1.7049440e+07 ... 1.2200000e+03
  8.1000000e+02 8.1000000e+01]]
2024-10-13 14:41:46,121 - Frequency Domain Features: [[0.00000000e+00 1.17032471e+08]
 [0.00000000e+00 1.17032520e+08]
 [0.00000000e+00 1.17032576e+08]

In [10]:
print(frequency_domain_features)

[[0.00000000e+00 1.17032471e+08]
 [0.00000000e+00 1.17032520e+08]
 [0.00000000e+00 1.17032576e+08]
 ...
 [0.00000000e+00 1.19346077e+08]
 [0.00000000e+00 1.19346133e+08]
 [0.00000000e+00 1.19346182e+08]]


In [36]:
import numpy as np
import logging
from tqdm import tqdm
from scipy.stats import skew, kurtosis
from datetime import datetime

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Function to extract statistical features with progress bar and logging
def extract_statistical_features(data):
    logger.info("Starting statistical feature extraction")
    start_time = datetime.now()
    
    statistical_features = []
    
    for sample in tqdm(data, desc="Extracting statistical features", unit="sample"):
        sample_features = []
        skew_value = skew(sample.T)
        kurtosis_value = kurtosis(sample.T)
        for feature in sample.T:
            mean = np.mean(feature)
            median = np.median(feature)
            variance = np.var(feature)
            feature_stats = [mean, median, variance, skew_value, kurtosis_value]
            sample_features.extend(feature_stats)
        statistical_features.append(sample_features)
    
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Statistical feature extraction completed in {elapsed_time}")
    
    return np.array(statistical_features)

# Example usage: Assuming 'data' is a 3D array (samples, sequence length, features)
statistical_features = extract_statistical_features(data)

# Log the output
logger.info(f"Data: {data}")
logger.info(f"Statistical Features: {statistical_features}")

2024-10-13 15:43:19,543 - Starting statistical feature extraction
Extracting statistical features: 100%|██████████| 78784/78784 [04:01<00:00, 326.83sample/s]
2024-10-13 15:47:20,600 - Statistical feature extraction completed in 0:04:01.056327
2024-10-13 15:47:20,888 - Data: [[5.0962000e+04 3.4188000e+04 1.6718871e+07 ... 1.9500000e+03
  6.2000000e+02 1.5000000e+01]
 [5.0962000e+04 3.4188000e+04 1.6718878e+07 ... 1.9500000e+03
  6.2000000e+02 4.5000000e+01]
 [5.0962000e+04 3.4188000e+04 1.6718886e+07 ... 1.9500000e+03
  6.2000000e+02 7.5000000e+01]
 ...
 [1.2224000e+04 2.9560000e+04 1.7049425e+07 ... 1.1900000e+03
  8.0000000e+02 8.0600000e+02]
 [1.2236000e+04 2.9560000e+04 1.7049433e+07 ... 1.2200000e+03
  8.1000000e+02 5.6800000e+02]
 [1.2252000e+04 2.9560000e+04 1.7049440e+07 ... 1.2200000e+03
  8.1000000e+02 8.1000000e+01]]
2024-10-13 15:47:20,891 - Statistical Features: [[5.09620000e+04 5.09620000e+04 0.00000000e+00 ... 0.00000000e+00
  2.04119691e+00 2.16656956e+00]
 [5.09620000e+

In [37]:
print(time_domain_features)
print(frequency_domain_features)
print(statistical_features)

[[5.0962e+04 0.0000e+00 5.0962e+04 ... 1.5000e+01 1.5000e+01 1.5000e+01]
 [5.0962e+04 0.0000e+00 5.0962e+04 ... 4.5000e+01 4.5000e+01 4.5000e+01]
 [5.0962e+04 0.0000e+00 5.0962e+04 ... 7.5000e+01 7.5000e+01 7.5000e+01]
 ...
 [1.2224e+04 0.0000e+00 1.2224e+04 ... 8.0600e+02 8.0600e+02 8.0600e+02]
 [1.2236e+04 0.0000e+00 1.2236e+04 ... 5.6800e+02 5.6800e+02 5.6800e+02]
 [1.2252e+04 0.0000e+00 1.2252e+04 ... 8.1000e+01 8.1000e+01 8.1000e+01]]
[[0.00000000e+00 1.17032471e+08]
 [0.00000000e+00 1.17032520e+08]
 [0.00000000e+00 1.17032576e+08]
 ...
 [0.00000000e+00 1.19346077e+08]
 [0.00000000e+00 1.19346133e+08]
 [0.00000000e+00 1.19346182e+08]]
[[5.09620000e+04 5.09620000e+04 0.00000000e+00 ... 0.00000000e+00
  2.04119691e+00 2.16656956e+00]
 [5.09620000e+04 5.09620000e+04 0.00000000e+00 ... 0.00000000e+00
  2.04119692e+00 2.16656959e+00]
 [5.09620000e+04 5.09620000e+04 0.00000000e+00 ... 0.00000000e+00
  2.04119694e+00 2.16656962e+00]
 ...
 [1.22240000e+04 1.22240000e+04 0.00000000e+00 ...

In [38]:
#Applying Attention-based Transformer Model
import tensorflow as tf
from tensorflow.keras import layers, Model

# Applying Attention-based Transformer Model
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

In [39]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [41]:
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, Model

class TransformerClassifier(Model):
    def __init__(self, num_classes, embed_dim, num_heads, ff_dim, num_layers):
        super(TransformerClassifier, self).__init__()
        self.dense_input = layers.Dense(embed_dim)  # Add this layer to project input features
        self.transformer_blocks = [
            TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_layers)
        ]
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        self.dense = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):  # Add training parameter
        x = self.dense_input(inputs)  # Project inputs to the embedding dimension
        for transformer in self.transformer_blocks:
            x = transformer(x, training=training)  # Pass training to the transformer
        x = self.global_average_pooling(x)
        return self.dense(x)

# Splitting the data into training and testing sets
features = np.concatenate((time_domain_features, frequency_domain_features, statistical_features), axis=1)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Defining the model parameters
num_classes = 3
embed_dim = 128
num_heads = 4
ff_dim = 128
num_layers = 2

# Creating the Transformer model
model = TransformerClassifier(num_classes, embed_dim, num_heads, ff_dim, num_layers)

# Compiling the model
model.compile(optimizer=Adam(clipnorm=1.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluating the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


Epoch 1/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 47ms/step - accuracy: 0.4757 - loss: 1.0303 - val_accuracy: 0.5965 - val_loss: 0.9220
Epoch 2/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 47ms/step - accuracy: 0.5989 - loss: 0.8011 - val_accuracy: 0.6081 - val_loss: 0.7540
Epoch 3/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 42ms/step - accuracy: 0.6815 - loss: 0.6424 - val_accuracy: 0.7194 - val_loss: 0.4767
Epoch 4/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 40ms/step - accuracy: 0.7405 - loss: 0.5223 - val_accuracy: 0.8141 - val_loss: 0.3650
Epoch 5/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 39ms/step - accuracy: 0.7702 - loss: 0.4533 - val_accuracy: 0.6593 - val_loss: 0.6718
Epoch 6/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 36ms/step - accuracy: 0.7748 - loss: 0.4593 - val_accuracy: 0.8350 - val_loss: 0.3351
Epoc

In [47]:
#Dataset splitting in Training/testing
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

# EMOTHAW dataset
def generate_sample_data(num_samples, seq_len, num_features, num_classes):
    data = np.random.rand(num_samples, seq_len, num_features)
    labels = np.random.randint(0, num_classes, num_samples)
    return data, labels

# Sample of EMOTHAW dataset\n"
num_samples = 6
seq_length = 800
num_features = 56
num_classes = 3  # Depression, Anxiety and Stress\n",

data, labels = generate_sample_data(num_samples, seq_length, num_features, num_classes)

# Split the data into training and testing sets\n
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=1, random_state=42)
"\n",
# Build and compile the model\n
num_blocks = 3
embed_dim = 64
num_heads = 4
ff_dim = 128
learning_rate = 0.001

transformer_model = TransformerClassifier(num_classes, embed_dim, num_heads, ff_dim, num_blocks)
transformer_model.compile(optimizer=Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training the model
epochs = 25
batch_size = 32

transformer_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluating the model
loss, accuracy = transformer_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}")

Epoch 1/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step - accuracy: 0.0000e+00 - loss: 2.5161 - val_accuracy: 1.0000 - val_loss: 0.5597
Epoch 2/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 595ms/step - accuracy: 0.6000 - loss: 0.7050 - val_accuracy: 1.0000 - val_loss: 0.3448
Epoch 3/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604ms/step - accuracy: 0.6000 - loss: 0.7004 - val_accuracy: 1.0000 - val_loss: 0.5051
Epoch 4/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 633ms/step - accuracy: 0.6000 - loss: 0.6652 - val_accuracy: 0.0000e+00 - val_loss: 0.7056
Epoch 5/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 590ms/step - accuracy: 0.6000 - loss: 0.6873 - val_accuracy: 1.0000 - val_loss: 0.6145
Epoch 6/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 595ms/step - accuracy: 0.6000 - loss: 0.6763 - val_accuracy: 1.0000 - val_loss: 0.4432
Epoch 7/25
[1m1/1[0m [32m━━━━━

In [52]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
directory = './models/'
model_path = os.path.join(directory, 'Emotion-detection.model.keras')
# Create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)
# Modify the model to work with sequence data
model = tf.keras.Sequential()

# LSTM layer expects 3D input (batch_size, timesteps, features)
model.add(LSTM(128, input_shape=(800, 56), return_sequences=True))
model.add(LSTM(128))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=3)
# Save the model
model.save(model_path)

# Compile the model\n",
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Epoch 1/3


2024-10-13 16:12:00,570 - 5 out of the last 35 calls to <function TensorFlowTrainer.make_train_function.<locals>.one_step_on_iterator at 0x00000252DA003A60> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.4000 - loss: 1.1108
Epoch 2/3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505ms/step - accuracy: 0.6000 - loss: 0.9247
Epoch 3/3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492ms/step - accuracy: 0.6000 - loss: 0.7889


In [54]:
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, Model

class TransformerClassifier(Model):
    def __init__(self, num_classes, embed_dim, num_heads, ff_dim, num_layers):
        super(TransformerClassifier, self).__init__()
        self.dense_input = layers.Dense(embed_dim)  # Add this layer to project input features
        self.transformer_blocks = [
            TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_layers)
        ]
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        self.dense = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):  # Add training parameter
        x = self.dense_input(inputs)  # Project inputs to the embedding dimension
        for transformer in self.transformer_blocks:
            x = transformer(x, training=training)  # Pass training to the transformer
        x = self.global_average_pooling(x)
        return self.dense(x)

# Splitting the data into training and testing sets
features = np.concatenate((time_domain_features, frequency_domain_features, statistical_features), axis=1)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Defining the model parameters
num_classes = 3
embed_dim = 128
num_heads = 4
ff_dim = 128
num_layers = 2

# Creating the Transformer model
model = TransformerClassifier(num_classes, embed_dim, num_heads, ff_dim, num_layers)

# Compiling the model
model.compile(optimizer=Adam(clipnorm=1.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluating the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


ValueError: Found input variables with inconsistent numbers of samples: [78784, 6]

In [74]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os

# Function to load and preprocess a single .svc file for prediction
def load_single_svc_file(file_path, expected_timesteps=800, num_features=56):
    try:
        # Load the file and process the lines
        with open(file_path, 'r') as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]  # Strip whitespace and ignore empty lines

        # Process lines into a DataFrame by splitting by whitespace
        df = pd.DataFrame([line.split() for line in lines])  # Split by whitespace

        if df.empty:
            raise ValueError(f"{file_path} is empty after processing.")

        # Extract the feature data from the rows starting from the second row
        data = df.iloc[1:, :].values  # All columns are features

        # Convert to a float NumPy array
        data = np.array(data, dtype=float)

        # Check if we need to pad or truncate data to match (800 timesteps, 56 features)
        if data.shape[0] > expected_timesteps:
            # If more timesteps, truncate the excess
            data = data[:expected_timesteps, :]
        elif data.shape[0] < expected_timesteps:
            # If fewer timesteps, pad with zeros
            padding = np.zeros((expected_timesteps - data.shape[0], num_features))
            data = np.vstack((data, padding))

        # Ensure that the number of features matches the expected number (56)
        if data.shape[1] > num_features:
            # Truncate excess features
            data = data[:, :num_features]
        elif data.shape[1] < num_features:
            # Pad missing features with zeros
            padding = np.zeros((expected_timesteps, num_features - data.shape[1]))
            data = np.hstack((data, padding))

        # Return the reshaped data
        return data

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Path to the .svc file to predict
file_path = '../test/samplefew/2_hw00004 (2).svc'

# Load the saved model
model = tf.keras.models.load_model('./models/Emotion-detection.model.keras')

# Load and preprocess the .svc file
data = load_single_svc_file(file_path)

# Reshape the data for prediction (1 sample, 800 timesteps, 56 features)
if data is not None:
    data = data.reshape((1, 800, 56))  # Reshape to match the input shape expected by the model

    # Predict the class probabilities
    predictions = model.predict(data)

    # Get the predicted class (index of the highest probability)
    predicted_class = np.argmax(predictions, axis=1)

    # Output the predicted class
    class_labels = ['Depression', 'Anxiety', 'Stress']
    print(f"Predicted emotion: {class_labels[predicted_class[0]]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396ms/step
Predicted emotion: Depression


In [None]:
print(labels)

[2 1 2 2 2 1]


In [97]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os

# Function to load and preprocess a single .svc file for prediction
def load_single_svc_file(file_path, expected_timesteps=800, num_features=56):
    try:
        # Load the file and process the lines
        with open(file_path, 'r') as f:
            lines = [line.strip() for line in f.readlines() if line.strip()]  # Strip whitespace and ignore empty lines

        # Process lines into a DataFrame by splitting by whitespace
        df = pd.DataFrame([line.split() for line in lines])  # Split by whitespace

        if df.empty:
            raise ValueError(f"{file_path} is empty after processing.")

        # Extract the feature data from the rows starting from the second row
        data = df.iloc[1:, :].values  # All columns are features

        # Convert to a float NumPy array
        data = np.array(data, dtype=float)

        # Check if we need to pad or truncate data to match (800 timesteps, 56 features)
        if data.shape[0] > expected_timesteps:
            # If more timesteps, truncate the excess
            data = data[:expected_timesteps, :]
        elif data.shape[0] < expected_timesteps:
            # If fewer timesteps, pad with zeros
            padding = np.zeros((expected_timesteps - data.shape[0], num_features))
            data = np.vstack((data, padding))

        # Ensure that the number of features matches the expected number (56)
        if data.shape[1] > num_features:
            # Truncate excess features
            data = data[:, :num_features]
        elif data.shape[1] < num_features:
            # Pad missing features with zeros
            padding = np.zeros((expected_timesteps, num_features - data.shape[1]))
            data = np.hstack((data, padding))

        # Return the reshaped data
        return data

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Path to the .svc file to predict
file_path = '../test/samplefew/0_hw00001(2).svc'

# Load the saved model
model = tf.keras.models.load_model('./models/Emotion-detection.model.keras')

# Load and preprocess the .svc file
data = load_single_svc_file(file_path)

# Reshape the data for prediction (1 sample, 800 timesteps, 56 features)
if data is not None:
    data = data.reshape((1, 800, 56))  # Reshape to match the input shape expected by the model

    # Predict the class probabilities
    predictions = model.predict(data)

    # Get the predicted class (index of the highest probability)
    predicted_class = np.argmax(predictions, axis=1)

    # Output the predicted class
    class_labels = ['Depression', 'Anxiety', 'Stress']
    print(f"Predicted emotion: {class_labels[predicted_class[0]]}")


TypeError: load_single_svc_file() missing 2 required positional arguments: 'expected_timesteps' and 'num_features'