In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import json
import librosa
import numpy as np

pose_root = "/content/drive/MyDrive/Trial_db/pro_json"
audio_root = "/content/drive/MyDrive/Trial_db/pro_audio"
data = []
labels = []

label_mapping = {
    'salsa': 0,
    'tap': 1,
    'ballet': 2,
    'contemporary': 3,
    'hip_hop': 4
}

for label_folder in os.listdir(pose_root):
    label_folder_path = os.path.join(pose_root, label_folder)
    for filename in os.listdir(label_folder_path):
        if filename.endswith('.json'):
            # Load the pose data from the JSON file
            with open(os.path.join(label_folder_path, filename), "r") as f:
                pose_data = json.load(f)
            pose_data = np.array(pose_data)

            # Extract features from pose data
            distances = np.sqrt(np.sum((pose_data[:, 0, :] - pose_data[:, 1, :])**2, axis=-1))
            v1 = pose_data[:, 1, :] - pose_data[:, 0, :]
            v2 = pose_data[:, 1, :] - pose_data[:, 2, :]
            angles = np.arccos(np.sum(v1*v2, axis=-1) / (np.linalg.norm(v1, axis=-1) * np.linalg.norm(v2, axis=-1)))
            pose_features = np.stack([distances, angles], axis=-1)
            pose_features_mean = np.mean(pose_features, axis=0)

            # Load the corresponding audio file
            audio_filename = filename.replace('_data.json', '_processed.wav')

            audio, sr = librosa.load(os.path.join(audio_root, label_folder, audio_filename))

            # Compute the spectrogram of the audio signal and convert it to logarithmic scale
            spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr)
            log_spectrogram = librosa.power_to_db(spectrogram)
            log_spectrogram_mean = np.mean(log_spectrogram, axis=1)

            # Concatenate pose and audio features
            features = np.concatenate([pose_features_mean, log_spectrogram_mean])

            data.append(features)

            # The label is determined by the folder name
            label = label_mapping[label_folder.lower()]
            labels.append(label)

# Convert data and labels to numpy arrays
data = np.array(data)
labels = np.array(labels)


In [3]:
data = data.reshape((data.shape[0], data.shape[1], 1))
labels = labels.reshape((labels.shape[0], 1))
data, label

(array([[[ 2.15085123e-02],
         [ 1.75697221e+00],
         [-3.37842979e+01],
         ...,
         [-6.21707535e+01],
         [-6.25044746e+01],
         [-6.31710281e+01]],
 
        [[ 1.72706956e-02],
         [ 1.82099307e+00],
         [-1.87621670e+01],
         ...,
         [-5.05472107e+01],
         [-5.05552444e+01],
         [-5.11638298e+01]],
 
        [[ 1.23731910e-02],
         [ 1.78958025e+00],
         [-1.77190857e+01],
         ...,
         [-5.13020325e+01],
         [-5.20816040e+01],
         [-5.31114731e+01]],
 
        ...,
 
        [[ 1.72736278e-02],
         [ 1.83838585e+00],
         [-8.91963673e+00],
         ...,
         [-4.21048393e+01],
         [-4.46740417e+01],
         [-5.34418106e+01]],
 
        [[ 2.02399118e-02],
         [ 1.86143073e+00],
         [-3.85895424e+01],
         ...,
         [-4.44532890e+01],
         [-4.69616814e+01],
         [-5.45545959e+01]],
 
        [[ 1.84674105e-02],
         [ 1.86962248e+00],
    

In [4]:
from sklearn.model_selection import train_test_split

indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]




In [5]:
from sklearn.model_selection import train_test_split

# Split data into 80% training and 20% remaining data (validation + test)
X_train, X_remaining, y_train, y_remaining = train_test_split(data, labels, test_size=0.2, random_state=42)

# Split remaining data (20%) into 50% validation and 50% test
X_validation, X_test, y_validation, y_test = train_test_split(X_remaining, y_remaining, test_size=0.5, random_state=42)


In [15]:
# Reshape to 2D
X_train_2D = X_train.reshape((X_train.shape[0] * X_train.shape[1], X_train.shape[2]))

# Fit the scaler
scaler.fit(X_train_2D)

# Transform the data
X_train_2D_norm = scaler.transform(X_train_2D)

# Reshape back to 3D
X_train_norm = X_train_2D_norm.reshape(X_train.shape)


In [16]:
# Transform validation data
X_validation_2D = X_validation.reshape((X_validation.shape[0] * X_validation.shape[1], X_validation.shape[2]))
X_validation_2D_norm = scaler.transform(X_validation_2D)
X_validation_norm = X_validation_2D_norm.reshape(X_validation.shape)

# Transform test data
X_test_2D = X_test.reshape((X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
X_test_2D_norm = scaler.transform(X_test_2D)
X_test_norm = X_test_2D_norm.reshape(X_test.shape)


In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed

# Assuming you have already split the data into X_train, X_validation, X_test, y_train, y_validation, y_test

# Build the LSTM autoencoder model
encoder = Sequential([
    LSTM(128, activation='relu', input_shape=(SEQUENCE_LENGTH, 1), return_sequences=False),
    RepeatVector(SEQUENCE_LENGTH)
])

decoder = Sequential([
    LSTM(64, activation='relu', return_sequences=True, input_shape=(SEQUENCE_LENGTH, 128)),
    LSTM(128, activation='relu', return_sequences=True),
    TimeDistributed(Dense(1))
])

# Compile the model
model = Sequential([encoder, decoder])
model.compile(optimizer = tf.keras.optimizers.Adam(clipvalue=1.0), loss='mse')

# Train the model on the training set
history = model.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_validation, X_validation))

# Evaluate the model on the validation set
val_loss = model.evaluate(X_validation, X_validation)
print("Validation Loss:", val_loss)

# Evaluate the model on the test set
test_loss = model.evaluate(X_test, X_test)
print("Test Loss:", test_loss)




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Validation Loss: 28031653888.0
Test Loss: 37718863872.0
