## Library Imports

In [226]:
import numpy as np
import pandas as pd
import glob
import os
import logging
import tensorflow as tf
from tqdm import tqdm
from datetime import datetime
from scipy.stats import skew, kurtosis
from tensorflow.keras import layers, Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Dense

# Pre-processing of files 

In [227]:
def load_emothaw_data(directory_path):
    all_data = []
    all_labels = []

    # Use glob to find all .svc files in the specified directory
    file_paths = glob.glob(os.path.join(directory_path, "*.svc"))
    if not file_paths:
        # Raise an error if no files are found
        raise ValueError("No files found in the specified directory")

    # Iterate over each file path found
    for file_path in file_paths:
        print(f"Processing file: {file_path}")
        try:
            # Extract the label from the filename
            # Assuming the label is the part before the first underscore '_'
            label = int(os.path.basename(file_path).split('_')[0])

            # Load the file and strip trailing spaces
            with open(file_path, 'r') as f:
                lines = [line.strip() for line in f.readlines() if line.strip()]  # Ignore empty lines

            # Convert the lines into a DataFrame by splitting each line by whitespace
            df = pd.DataFrame([line.split() for line in lines])  # Split by whitespace

            # Skip empty files
            if df.empty:
                print(f"Warning: {file_path} is empty after processing. Skipping file.")
                continue

            # First row contains the total number of data rows (excluding the first row)
            total_rows = int(df.iloc[0, 0])  # Number of expected sequences

            # Extract the feature data from the subsequent rows
            data = df.iloc[1:, :].values  # All columns are treated as features

            # Check if enough data rows exist as per the first row's instruction
            if data.shape[0] < total_rows:
                raise ValueError(f"Insufficient data: expected {total_rows}, but found {data.shape[0]}")

            # Ensure each row has exactly 7 feature values
            reshaped_data = []
            for row in data:
                if len(row) == 7:
                    reshaped_data.append(row)
                else:
                    print(f"Warning: Row does not have 7 values, skipping: {row}")

            # If valid reshaped data exists, convert it into a NumPy array
            if reshaped_data:
                reshaped_data = np.array(reshaped_data, dtype=float)  # Convert to float type

                # Check that reshaped data has enough rows as expected
                if reshaped_data.shape[0] < total_rows:
                    raise ValueError(f"Insufficient data: expected {total_rows}, but found {reshaped_data.shape[0]}")

                # Append the reshaped data and labels
                all_data.append(reshaped_data)
                all_labels.append([label] * reshaped_data.shape[0])  # Append the label for each data row

                # Print the shape of reshaped data for debugging
                print(f"Shape of data from file {file_path}: {reshaped_data.shape}")

        except Exception as e:
            # If any error occurs, display the error and continue processing the next file
            print(f"Error processing file {file_path}: {e}")
            continue

    # Raise an error if no valid data was processed
    if not all_data or not all_labels:
        raise ValueError("No valid data found in the directory")

    # Print the shapes of individual data entries for debugging
    print("Shapes of all_data before concatenation:")
    for i, data_array in enumerate(all_data):
        print(f"Data array {i} shape: {data_array.shape}")

    # Concatenate all data arrays along the sample axis (rows)
    try:
        all_data = np.concatenate(all_data, axis=0)  # Concatenate data arrays
    except Exception as e:
        print(f"Error during concatenation: {e}")

    # Concatenate labels
    all_labels = np.concatenate(all_labels, axis=0)

    # Return the processed data and corresponding labels
    return all_data, all_labels

# Importing all files on the Directory
data, labels = load_emothaw_data(r'../test/sampleLabeled/')  # Directory path


Processing file: ../test/sampleLabeled\0_hw00001(2).svc
Shape of data from file ../test/sampleLabeled\0_hw00001(2).svc: (1774, 7)
Processing file: ../test/sampleLabeled\0_hw00001(3).svc
Shape of data from file ../test/sampleLabeled\0_hw00001(3).svc: (3444, 7)
Processing file: ../test/sampleLabeled\0_hw00002.svc
Shape of data from file ../test/sampleLabeled\0_hw00002.svc: (3800, 7)
Processing file: ../test/sampleLabeled\0_hw00003 (2).svc
Shape of data from file ../test/sampleLabeled\0_hw00003 (2).svc: (4276, 7)
Processing file: ../test/sampleLabeled\0_hw00003(2).svc
Shape of data from file ../test/sampleLabeled\0_hw00003(2).svc: (3959, 7)
Processing file: ../test/sampleLabeled\0_hw00003(3).svc
Shape of data from file ../test/sampleLabeled\0_hw00003(3).svc: (4401, 7)
Processing file: ../test/sampleLabeled\0_hw00003.svc
Shape of data from file ../test/sampleLabeled\0_hw00003.svc: (3102, 7)
Processing file: ../test/sampleLabeled\0_hw00004.svc
Shape of data from file ../test/sampleLabeled\0

## FEATURE EXTRACTION

In [228]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Function to extract time-domain features with progress bar and logging
def extract_time_domain_features(data):
    logger.info("Starting time-domain feature extraction")
    start_time = datetime.now()

    time_features = []
    
    # Use tqdm to create a progress bar
    for sample in tqdm(data, desc="Extracting features", unit="sample"):
        sample_features = []
        for feature in sample.T:  # Assuming the features are along the last axis
            feature_stats = [
                np.mean(feature),
                np.std(feature),
                np.min(feature),
                np.max(feature),
                np.percentile(feature, 25),
                np.percentile(feature, 50),
                np.percentile(feature, 75),
            ]
            sample_features.extend(feature_stats)  # Append the computed stats to sample_features
        time_features.append(sample_features)  # Append the features for the sample

    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Feature extraction completed in {elapsed_time}")

    return np.array(time_features)

# Assuming 'data' is your input 3D array (samples, sequence length, features)
time_domain_features = extract_time_domain_features(data)



2024-10-15 16:32:23,882 - Starting time-domain feature extraction
Extracting features: 100%|██████████| 86355/86355 [05:31<00:00, 260.63sample/s]
2024-10-15 16:37:55,258 - Feature extraction completed in 0:05:31.374800


In [229]:
# Extracting frequency domain features using FFT with progress bar and logging
def extract_frequency_domain_features(data):
    logger.info("Starting frequency-domain feature extraction")
    start_time = datetime.now()
    
    freq_features = []
    
    # Ensure the data has the correct dimensions
    if len(data.shape) != 2:
        raise ValueError(f"Expected data with 2 dimensions (samples, features), but got {data.shape}")
    
    # Use tqdm to create a progress bar
    for sample in tqdm(data, desc="Extracting frequency features", unit="sample"):
        sample_features = []
        
        # Check if the sample is a 1D array
        if len(sample.shape) != 1:
            raise ValueError(f"Expected each sample to be 1D, but got {sample.shape}")

        # Apply FFT to the sample (which is already 1D, hence no need for .T)
        freq_feature = np.fft.fft(sample)
        freq_magnitude = np.abs(freq_feature)
        dominant_freq = np.argmax(freq_magnitude)
        freq_energy = np.sum(freq_magnitude)
        
        sample_features.extend([dominant_freq, freq_energy])
        freq_features.append(sample_features)
    
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Frequency-domain feature extraction completed in {elapsed_time}")

    return np.array(freq_features)

# Example usage: Assuming `data` is a 2D array (samples, features)
# data = np.random.rand(100, 50)  # Example data
frequency_domain_features = extract_frequency_domain_features(data)

# Log the output
logger.info(f"Data: {data}")
logger.info(f"Frequency Domain Features: {frequency_domain_features}")


2024-10-15 16:37:55,703 - Starting frequency-domain feature extraction
Extracting frequency features: 100%|██████████| 86355/86355 [00:01<00:00, 44616.77sample/s]
2024-10-15 16:37:57,642 - Frequency-domain feature extraction completed in 0:00:01.937481
2024-10-15 16:37:57,698 - Data: [[5.142800e+04 3.443100e+04 7.354731e+06 ... 1.850000e+03 6.000000e+02
  1.270000e+02]
 [5.142800e+04 3.443100e+04 7.354738e+06 ... 1.850000e+03 6.000000e+02
  2.210000e+02]
 [5.142800e+04 3.443100e+04 7.354746e+06 ... 1.850000e+03 6.000000e+02
  2.680000e+02]
 ...
 [8.880000e+03 2.852000e+03 1.662474e+06 ... 2.550000e+03 7.500000e+02
  1.023000e+03]
 [8.903000e+03 2.838000e+03 1.662482e+06 ... 2.550000e+03 7.500000e+02
  9.210000e+02]
 [8.939000e+03 2.829000e+03 1.662489e+06 ... 2.560000e+03 7.400000e+02
  1.180000e+02]]
2024-10-15 16:37:57,699 - Frequency Domain Features: [[       0.         51483983.15708157]
 [       0.         51484032.14330792]
 [       0.         51484088.13747965]
 ...
 [       0. 

In [230]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Function to extract statistical features with progress bar and logging
def extract_statistical_features(data):
    logger.info("Starting statistical feature extraction")
    start_time = datetime.now()
    
    statistical_features = []
    
    for sample in tqdm(data, desc="Extracting statistical features", unit="sample"):
        sample_features = []
        skew_value = skew(sample.T)
        kurtosis_value = kurtosis(sample.T)
        for feature in sample.T:
            mean = np.mean(feature)
            median = np.median(feature)
            variance = np.var(feature)
            feature_stats = [mean, median, variance, skew_value, kurtosis_value]
            sample_features.extend(feature_stats)
        statistical_features.append(sample_features)
    
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    logger.info(f"Statistical feature extraction completed in {elapsed_time}")
    
    return np.array(statistical_features)

# Example usage: Assuming 'data' is a 3D array (samples, sequence length, features)
statistical_features = extract_statistical_features(data)

# Log the output
logger.info(f"Data: {data}")
logger.info(f"Statistical Features: {statistical_features}")

2024-10-15 16:37:57,765 - Starting statistical feature extraction
Extracting statistical features: 100%|██████████| 86355/86355 [03:05<00:00, 466.77sample/s]
2024-10-15 16:41:02,777 - Statistical feature extraction completed in 0:03:05.010685
2024-10-15 16:41:03,049 - Data: [[5.142800e+04 3.443100e+04 7.354731e+06 ... 1.850000e+03 6.000000e+02
  1.270000e+02]
 [5.142800e+04 3.443100e+04 7.354738e+06 ... 1.850000e+03 6.000000e+02
  2.210000e+02]
 [5.142800e+04 3.443100e+04 7.354746e+06 ... 1.850000e+03 6.000000e+02
  2.680000e+02]
 ...
 [8.880000e+03 2.852000e+03 1.662474e+06 ... 2.550000e+03 7.500000e+02
  1.023000e+03]
 [8.903000e+03 2.838000e+03 1.662482e+06 ... 2.550000e+03 7.500000e+02
  9.210000e+02]
 [8.939000e+03 2.829000e+03 1.662489e+06 ... 2.560000e+03 7.400000e+02
  1.180000e+02]]
2024-10-15 16:41:03,060 - Statistical Features: [[5.14280000e+04 5.14280000e+04 0.00000000e+00 ... 0.00000000e+00
  2.04100695e+00 2.16615462e+00]
 [5.14280000e+04 5.14280000e+04 0.00000000e+00 ...

In [198]:
## MultiHeadSelf Attention Layer

In [231]:
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim  # Total embedding dimension
        self.num_heads = num_heads  # Number of attention heads
        
        # Ensure that the embedding dimension is divisible by the number of heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        
        # Calculate the dimension per head
        self.projection_dim = embed_dim // num_heads
        
        # Dense layers for query, key, and value projections
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        
        # Dense layer to combine the heads' outputs
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        # Calculate the attention scores
        score = tf.matmul(query, key, transpose_b=True)
        
        # Scale the scores by the square root of the key dimension
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        
        # Apply softmax to get the attention weights
        weights = tf.nn.softmax(scaled_score, axis=-1)
        
        # Compute the output as a weighted sum of the values
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        # Reshape the input tensor to separate the heads
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])  # Transpose for compatibility with attention computation

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]  # Get the batch size
        
        # Apply dense layers to inputs to create query, key, and value tensors
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        
        # Separate the heads for query, key, and value tensors
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        
        # Calculate the attention output and weights
        attention, weights = self.attention(query, key, value)
        
        # Transpose the attention output back to the original shape
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        
        # Combine the heads' outputs
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)  # Final dense layer to project the output
        return output


## Transformer Block

In [232]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        
        # Initialize the multi-head self-attention layer
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        
        # Feedforward network: A sequential model with a ReLU activation function
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)]
        )
        
        # Layer normalization for residual connections
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        
        # Dropout layers for regularization
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        # Apply multi-head self-attention to the input
        attn_output = self.att(inputs)
        
        # Apply dropout to the attention output (only during training)
        attn_output = self.dropout1(attn_output, training=training)
        
        # Add the original input (residual connection) to the attention output and apply layer normalization
        out1 = self.layernorm1(inputs + attn_output)
        
        # Pass through the feedforward network
        ffn_output = self.ffn(out1)
        
        # Apply dropout to the feedforward output (only during training)
        ffn_output = self.dropout2(ffn_output, training=training)
        
        # Add the output from the feedforward network to the previous output (residual connection) and apply layer normalization
        return self.layernorm2(out1 + ffn_output)


Transformer Classifier

In [233]:
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, Model

class TransformerClassifier(Model):
    def __init__(self, num_classes, embed_dim, num_heads, ff_dim, num_layers):
        super(TransformerClassifier, self).__init__()
        
        # Dense layer to project input features into the embedding dimension
        self.dense_input = layers.Dense(embed_dim)
        
        # Create multiple transformer blocks as specified by num_layers
        self.transformer_blocks = [
            TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_layers)
        ]
        
        # Global average pooling layer to reduce dimensionality before classification
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        
        # Final dense layer for classification with softmax activation
        self.dense = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):  # Include training parameter
        # Project inputs to the embedding dimension
        x = self.dense_input(inputs)
        
        # Pass the projected inputs through each transformer block
        for transformer in self.transformer_blocks:
            x = transformer(x, training=training)  # Forward the training parameter to transformer blocks
            
        # Apply global average pooling to reduce dimensionality
        x = self.global_average_pooling(x)
        
        # Pass the pooled output through the final dense layer
        return self.dense(x)

# Splitting the data into training and testing sets
features = np.concatenate((time_domain_features, frequency_domain_features, statistical_features), axis=1)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Defining the model parameters
num_classes = 3  # Number of output classes for classification
embed_dim = 128  # Embedding dimension for the transformer
num_heads = 4    # Number of attention heads in each transformer block
ff_dim = 128     # Dimension of the feedforward network
num_layers = 2   # Number of transformer blocks in the model

# Creating the Transformer model
model = TransformerClassifier(num_classes, embed_dim, num_heads, ff_dim, num_layers)

# Compiling the model with Adam optimizer and sparse categorical crossentropy loss
model.compile(optimizer=Adam(clipnorm=1.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training the model on the training data
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluating the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


Epoch 1/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 34ms/step - accuracy: 0.4069 - loss: 1.1061 - val_accuracy: 0.4880 - val_loss: 0.9949
Epoch 2/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 34ms/step - accuracy: 0.5392 - loss: 0.9284 - val_accuracy: 0.6873 - val_loss: 0.6426
Epoch 3/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 35ms/step - accuracy: 0.7294 - loss: 0.5812 - val_accuracy: 0.8380 - val_loss: 0.3195
Epoch 4/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 38ms/step - accuracy: 0.8371 - loss: 0.3803 - val_accuracy: 0.9501 - val_loss: 0.1557
Epoch 5/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 35ms/step - accuracy: 0.8799 - loss: 0.2905 - val_accuracy: 0.9600 - val_loss: 0.1502
Epoch 6/10
[1m1728/1728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 38ms/step - accuracy: 0.8974 - loss: 0.2553 - val_accuracy: 0.8707 - val_loss: 0.2942
Epo

In [None]:
print(features)

## REQUIRED

In [234]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

# Split the data into training and testing sets
# Use the train_test_split function from scikit-learn to create training and testing datasets
# Adjust the test_size parameter to control the proportion of data in the test set
# In this case, it is set to 1, which means the entire dataset will be used for testing
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Define model parameters for the transformer classifier
num_blocks = 3       # Number of transformer blocks to be used in the model
embed_dim = 64       # Dimension of the embedding space
num_heads = 4        # Number of attention heads in each transformer block
ff_dim = 128         # Dimension of the feedforward network within each transformer block
learning_rate = 0.001 # Learning rate for the optimizer

# Instantiate the TransformerClassifier model with the defined parameters
transformer_model = TransformerClassifier(num_classes, embed_dim, num_heads, ff_dim, num_blocks)

# Compile the model with the Adam optimizer and the specified loss function
# Metrics set to 'accuracy' to evaluate the performance during training
transformer_model.compile(optimizer=Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Set training parameters
epochs = 5           # Number of epochs to train the model
batch_size = 32      # Number of samples per gradient update

# Train the model on the training data
# The validation_data parameter is set to evaluate the model on the test set during training
transformer_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model on the test set
# This will return the loss and accuracy of the model on the unseen test data
loss, accuracy = transformer_model.evaluate(X_test, y_test)

# Print the test accuracy as a percentage
print(f"Test Accuracy: {accuracy * 100:.2f}")


Epoch 1/5
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 29ms/step - accuracy: 0.4229 - loss: 1.0720 - val_accuracy: 0.5674 - val_loss: 0.9246
Epoch 2/5
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 32ms/step - accuracy: 0.5737 - loss: 0.8743 - val_accuracy: 0.7307 - val_loss: 0.5667
Epoch 3/5
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 33ms/step - accuracy: 0.6852 - loss: 0.6505 - val_accuracy: 0.7702 - val_loss: 0.4999
Epoch 4/5
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 38ms/step - accuracy: 0.7288 - loss: 0.5664 - val_accuracy: 0.8133 - val_loss: 0.3915
Epoch 5/5
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 35ms/step - accuracy: 0.8336 - loss: 0.3732 - val_accuracy: 0.9074 - val_loss: 0.2669
[1m540/540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9080 - loss: 0.2669
Test Accuracy: 90.74


## Just test code below

In [None]:

# Define the directory and model path for saving the trained model
directory = './newmodels/'
model_path = os.path.join(directory, 'testmodel.model.keras')

# Create the directory if it doesn't exist to ensure the model can be saved
os.makedirs(directory, exist_ok=True)

# Define the number of classes for the classification task
# Adjust this according to your dataset (for example, emotions)
num_classes = 3  # Example number of classes

# Create a sequential model for LSTM
model = tf.keras.Sequential()

# Reshape the training data to add a time steps dimension
# Assuming X_train originally has shape (num_samples, features)
new_train = X_train.reshape((X_train.shape[0], 1, 7))  # Add a timesteps dimension

# Add LSTM layers to the model
model.add(LSTM(128, input_shape=(None, 7), return_sequences=True))  # First LSTM layer
model.add(LSTM(128))  # Second LSTM layer (returns only the last output)

# Add a dense layer for feature extraction
model.add(Dense(64, activation='relu'))  # Fully connected layer with ReLU activation

# Add the final dense layer for classification with softmax activation
model.add(Dense(num_classes, activation='softmax'))  # Output layer for class probabilities

# Compile the model with the Adam optimizer and specify the loss function and metrics
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model using the training data
# Ensure that X_train has the appropriate shape: (batch_size, timesteps, features)
model.fit(new_train, y_train, epochs=10)  # Train for 10 epochs

# Save the trained model to the specified path
model.save(model_path)

# Optionally, recompile the model (not necessary right after training)
# This step can be useful if you want to change the optimizer or loss function later
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


## REQUIRED

In [246]:
# Update Sequential model to fit the 7-feature data
directory = './newmodels/'
model_path = os.path.join(directory, 'new_model.model.keras')
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(7,)))  # Input shape updated to (7,)
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))  # Adjusting num_classes

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with your training data
model.fit(X_train, y_train, epochs=20)

# Save the model
model.save(model_path)

# Recompile the model for further use if needed
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Epoch 1/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.3532 - loss: 55225.8125
Epoch 2/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.3664 - loss: 15547.7441
Epoch 3/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.4489 - loss: 4983.7852
Epoch 4/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5351 - loss: 2332.7700
Epoch 5/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6039 - loss: 1174.6277
Epoch 6/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6151 - loss: 809.9397
Epoch 7/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6599 - loss: 246.2161
Epoch 8/20
[1m2159/2159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.5172 - loss: 0.8354
Epo

In [247]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os

# Function to load and preprocess a single .svc file for prediction
def load_single_svc_file(file_path, expected_features=7):
    try:
        # Open the .svc file and read its lines
        with open(file_path, 'r') as f:
            # Strip whitespace and filter out empty lines
            lines = [line.strip() for line in f.readlines() if line.strip()]

        # Create a DataFrame from the lines
        df = pd.DataFrame([line.split() for line in lines])

        # Raise an error if the DataFrame is empty after processing
        if df.empty:
            raise ValueError(f"{file_path} is empty after processing.")

        # Extract data from the DataFrame, ignoring the first row (header)
        data = df.iloc[1:, :].values
        data = np.array(data, dtype=float)

        # Debugging print to show the initial data loaded
        print(f"Initial data loaded from {file_path}:\n{data}")

        # Ensure the data has the correct number of features
        if data.shape[1] > expected_features:
            data = data[:, :expected_features]  # Truncate to expected_features
            print(f"Truncated data to {expected_features} features:\n{data}")
        elif data.shape[1] < expected_features:
            # Pad with zeros if there are too few features
            padding = np.zeros((data.shape[0], expected_features - data.shape[1]))
            data = np.hstack((data, padding))
            print(f"Padded data to {expected_features} features:\n{data}")

        return data

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Path to the .svc file for prediction
file_path = '../test/sampleLabeled/1_hw00002(2).svc'

# Load the custom model for prediction
custom_objects = {'TransformerClassifier': TransformerClassifier}  # Adjust as necessary
model = tf.keras.models.load_model('./newmodels/2Emotion-detectiontest.model.keras')

# Load and preprocess the .svc file
data = load_single_svc_file(file_path)

if data is not None:
    expected_features = 7  # The number of features your model expects

    # Debugging prints for data shapes
    print(f"Original data shape: {data.shape}")
    total_elements = data.size
    print(f"Total elements in data: {total_elements}")

    # Calculate complete samples
    complete_samples = total_elements // expected_features
    print(f"Complete samples possible: {complete_samples}")

    # Adjust data to match complete samples
    data = data[:complete_samples * expected_features]
    print(f"Adjusted data shape for reshaping: {data.shape}")

    # Ensure that the data can be reshaped correctly
    if data.size % expected_features != 0:
        raise ValueError("The data cannot be reshaped into the required shape.")

    # Reshape the data
    data = data.reshape((complete_samples, expected_features))
    print(f"Reshaped data shape: {data.shape}")

    # Add timesteps dimension
    data = np.expand_dims(data, axis=1)  # New shape: (samples, timesteps, features)
    print(f"Data shape after adding timesteps dimension: {data.shape}")

    # Predict the class probabilities
    predictions = model.predict(data)
    print(f"Predictions shape: {predictions.shape}")
    print(f"Predictions: {predictions}")

    # Get the predicted class
    predicted_class = np.argmax(predictions, axis=1)
    print(f"Predicted class indices: {predicted_class}")

    # Output the predicted class with emotion labels
    class_labels = ['Depression', 'Anxiety', 'Stress']  # Adjust as necessary
    print(f"Predicted emotion: {class_labels[predicted_class[0]]}")


ValueError: File not found: filepath=./newmodels.model.keras. Please ensure the file is an accessible `.keras` zip file.

In [None]:
print("Model summary:")
model.summary()

In [None]:
print(predictions)

In [None]:
print(data)

In [None]:
for i in predictions:
    print(i)