In [2]:
# Convert audio file to features (log mel mean, log mel max, log mel min, log mel delta mean, log mel delta max, log mel delta min )
num_loaded = 0
target_frames = 86  # ≈ 2 seconds with librosa defaults (sr=22050, hop_length=512)
printProgressBar(0, len(audio_files), prefix='Progress:', suffix='Complete', length=50)
input_features = np.empty((len(audio_files), 2), dtype=object)

for afile in audio_files:
    # Extract y = (the raw data), and sr = (integer value of sample rate)
    y, sr = librosa.load(afile)
    # Apply STFT
    D = librosa.stft(y)
    # Retreive Mel
    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=128 * 2,)
    S_decible_mel = librosa.amplitude_to_db(S, ref=np.max)
    # Extract Log Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    # Extract Delta Mel spectrogram
    delta_log_mel_spectrogram = librosa.feature.delta(log_mel_spectrogram)
    input_features[num_loaded, 0] = log_mel_spectrogram
    input_features[num_loaded, 1] = delta_log_mel_spectrogram
    # sample_features = np.stack([(log_mel_spectrogram.T, delta_log_mel_spectrogram.T)], axis=-1)
    # input_features.append(sample_features)
    num_loaded += 1
    printProgressBar(
        num_loaded,
        len(audio_files),
        prefix='Progress:',
        suffix=f'  [{num_loaded}/{len(audio_files)}]',
        length=50
    )

Progress: |██████████████████████████████████████████████████| 100.0%   [24577/24577]


In [25]:
# Convert audio file to features (log mel spectogram and log mel delta)
num_loaded = 0
target_frames = 86  # ≈ 2 seconds with librosa defaults (sr=22050, hop_length=512)
printProgressBar(0, len(audio_files), prefix='Progress:', suffix='Complete', length=50)
input_features_list = []

for afile in audio_files:
    # Extract y = (the raw data), and sr = (integer value of sample rate)
    y, sr = librosa.load(afile)
    
    # Apply STFT
    D = librosa.stft(y)
    
    # Retreive Mel
    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=128 * 2,)
    S_decible_mel = librosa.amplitude_to_db(S, ref=np.max)
    
    # Extract Log Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Standardize the length using padding/truncation
    # The shape is (n_mels, time_frames). We fix the second dimension.
    fixed_log_mel = librosa.util.fix_length(log_mel_spectrogram, 
                                            size=target_frames, 
                                            axis=1, 
                                            constant_values=0 # Pad with zeros if shorter
                                            )
    
    # Extract Delta Mel spectrogram
    delta_log_mel_spectrogram = librosa.feature.delta(log_mel_spectrogram)
    
    # Standardize the delta spectrogram length too
    fixed_delta_log_mel = librosa.util.fix_length(delta_log_mel_spectrogram, 
                                                  size=target_frames, 
                                                  axis=1,
                                                  constant_values=0
                                                 )

    sample_features = np.stack([fixed_log_mel, fixed_delta_log_mel], axis=0)
    input_features_list.append(sample_features)
    num_loaded += 1
    printProgressBar(
        num_loaded,
        len(audio_files),
        prefix='Progress:',
        suffix=f'  [{num_loaded}/{len(audio_files)}]',
        length=50
    )
input_tensor = torch.tensor(np.array(input_features_list), dtype=torch.float32)
print(f"Final tensor shape: {input_tensor.shape}")

Progress: |██████████████████████████████████████████████████| 100.0%   [24577/24577]


NameError: name 'torch' is not defined

In [26]:
import torch
input_tensor = torch.tensor(np.array(input_features_list), dtype=torch.float32)
print(f"Final tensor shape: {input_tensor.shape}")

Final tensor shape: torch.Size([24577, 2, 128, 86])


In [27]:
torch.save(input_tensor, 'input.pt')

In [53]:
from progressbar import printProgressBar

import os
from glob import glob
from dataclasses import dataclass
import torch
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

import librosa
import librosa.display
import IPython.display as ipd
from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])


# File Paths

# Get the directory of the current script
script_dir = os.getcwd()
# Other Project Paths
root_dir = os.path.join(script_dir, '..', '..')
training_data_dir = os.path.join(root_dir, 'training data')
categories_dir = os.path.join(training_data_dir, 'categories')
cremad_dir = os.path.join(training_data_dir, 'files', 'CREMA-D')
emogator_dir = os.path.join(training_data_dir, 'files', 'Emogator', 'data', 'mp3')
datasheeet_path = os.path.join(categories_dir, 'data.xlsx')


# Read Data
audio_files = []
data_raw_df = pd.read_excel(datasheeet_path)
headers = data_raw_df.columns.values.tolist()
data_raw_noheaders_df = data_raw_df.values
data_df = pd.DataFrame(data_raw_noheaders_df)

# Extract Targets
emotion_target_categories = headers[3:11]
intensity_target_categories = headers[12:]
selected_emotion_targets_df = data_df.iloc[:, [i for i in range(3, 11)]]
selected_intensity_targets_df = data_df.iloc[:, [i for i in range(12, 15)]]
emotion_targets = selected_emotion_targets_df.to_numpy()
intensity_targets = selected_intensity_targets_df.to_numpy()

# Load the audio files
datasets = data_raw_df['Dataset'].values
files = data_raw_df['File'].values
num_loaded = 0
for dataset, file in zip(datasets, files):
    if dataset == 'CREMA-D':
        file_path = os.path.join(cremad_dir, file)
        audio_files.append(file_path)
        num_loaded += 1
    elif dataset == 'EmoGator':
        file_path = os.path.join(emogator_dir, file)
        audio_files.append(file_path)
        num_loaded += 1
print(f'Loaded {num_loaded} files from dataset.')

# Convert audio file to features (log mel spectogram and log mel delta)
num_loaded = 0
printProgressBar(0, len(audio_files), prefix='Progress:', suffix='Complete', length=50)
input_features_list = []

for afile in audio_files:
    # Extract y = (the raw data), and sr = (integer value of sample rate)
    y, sr = librosa.load(afile)
    
    
    # Apply STFT
    D = librosa.stft(y)
    
    # Retreive Mel
    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=128 * 2,)
    S_decible_mel = librosa.amplitude_to_db(S, ref=np.max)
    
    # Extract Log Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mean_mel_log = float(np.mean(log_mel_spectrogram))
    min_mel_log = float(np.min(log_mel_spectrogram))
    max_mel_log = float(np.max(log_mel_spectrogram))
    
    # Extract Delta Mel spectrogram
    delta_log_mel_spectrogram = librosa.feature.delta(log_mel_spectrogram)
    mean_delta_mel_log = float(np.mean(delta_log_mel_spectrogram))
    min_delta_mel_log = float(np.min(delta_log_mel_spectrogram))
    max_delta_mel_log = float(np.max(delta_log_mel_spectrogram))
    
    sample_features = np.array(
        [
            mean_mel_log,
            mean_delta_mel_log,
            max_mel_log,
            max_delta_mel_log,
            min_mel_log,
            min_delta_mel_log,
        ],
        dtype=np.float32,
    )
    input_features_list.append(sample_features)
    num_loaded += 1
    printProgressBar(
        num_loaded,
        len(audio_files),
        prefix='Progress:',
        suffix=f'  [{num_loaded}/{len(audio_files)}]',
        length=50
    )

# Save file to avoid preprocessing more
# np.savetxt('input_features.json', input_features, delimiter=',', fmt='%d', comments='')
np.savetxt('emotion_targets.csv', emotion_targets, delimiter=',', fmt='%d', comments='')
np.savetxt('intensity_targets.csv', intensity_targets, delimiter=',', fmt='%d', comments='')
np.savetxt('intensity_targets.csv', np.array(input_features_list), delimiter=',', fmt='%d', comments='')
# input_tensor = torch.tensor(np.array(input_features_list), dtype=torch.float32)
# torch.save(input_tensor, 'input.pt')
print(f"Final tensor shape: {input_tensor.shape}")
print(f'Wrote numpy arrays to file for training in {script_dir}')
print('Completed preprocessing!')

Loaded 24577 files from dataset.
Progress: |██████████████████████████████████████████████████| 100.0%   [24577/24577]
Final tensor shape: torch.Size([24577, 6])
Wrote numpy arrays to file for training in /Users/clinvil/Programs/ToneAI/labs/DNN
Completed preprocessing!


In [50]:
input_features_list[0]

array([-5.5341328e+01,  6.1176196e-02,  0.0000000e+00,  9.6657677e+00,
       -8.0000000e+01, -6.0179114e+00], dtype=float32)

In [49]:
torch.save(input_tensor, 'input.pt')
np.savetxt('intensity_targets.csv', intensity_targets, delimiter=',', fmt='%d', comments='')
np.savetxt('input.csv', np.array(input_features_list), delimiter=',', fmt='%d', comments='')

In [45]:
import os
import numpy as np
from torch.utils.data import TensorDataset, random_split, DataLoader
import torch.nn as nn
import torch

# Variables
train_slice = .75
validation_slice = .15
batch_size = 32


# Datasets
script_dir = os.getcwd()
input_features_path = os.path.join(script_dir, 'input.pt')
input_features = torch.load(input_features_path).float()
emotion_targets_np = np.loadtxt(os.path.join(script_dir, 'emotion_targets.csv'), delimiter=',') 
emotion_targets = torch.from_numpy(emotion_targets_np)
dataset = TensorDataset(input_features, emotion_targets)

# Sanity check: same number of samples
assert input_features.shape[0] == emotion_targets.shape[0], \
    f"Input/target size mismatch: {input_features.shape[0]} vs {emotion_targets.shape[0]}"

# Split model sets
train_size = int(train_slice * len(dataset))
val_size = int(validation_slice * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# --------------------
# Model
# --------------------
input_dim = input_features.shape[1]      # e.g. 6 if you used [mean, max, min, ...]
output_dim = emotion_targets.shape[1]    # number of emotion targets

class SimpleDNN(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(SimpleDNN, self).__init__()
        self.fc1 = nn.Linear(in_dim, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, out_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = SimpleDNN(input_dim, output_dim)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Optional: use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# --------------------
# Training loop
# --------------------
num_epochs = 10
for epoch in range(num_epochs):
    # ---- Train ----
    model.train()
    running_train_loss = 0.0

    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)

    # ---- Validation ----
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            val_loss += criterion(outputs, targets).item()

    avg_val_loss = val_loss / len(val_loader)
    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

# --------------------
# Test evaluation
# --------------------
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        test_loss += criterion(outputs, targets).item()

avg_test_loss = test_loss / len(test_loader)
print(f'Test Loss: {avg_test_loss:.4f}')

RuntimeError: Found dtype Double but expected Float

In [18]:
input_features[1,0].shape

(128, 101)

In [19]:
input_features[1,1].shape

(128, 101)

In [4]:
# Save file to avoid preprocessing more
# np.savetxt('input_features.json', input_features, delimiter=',', fmt='%d', comments='')
np.savetxt('emotion_targets.csv', emotion_targets, delimiter=',', fmt='%d', comments='')
np.savetxt('intensity_targets.csv', intensity_targets, delimiter=',', fmt='%d', comments='')
np.save('input_features.npy', input_features)
print(f'Wrote numpy arrays to file for training in {script_dir}')
print('Completed preprocessing!')

Wrote numpy arrays to file for training in /Users/clinvil/Programs/ToneAI/labs/DNN
Completed preprocessing!


In [6]:
t_inputs = np.load(os.path.join(script_dir, 'input_features.npy'), allow_pickle=True)

In [7]:
t_inputs.shape

(24577, 2)

In [37]:
emotion_targets = np.loadtxt(os.path.join(script_dir, 'emotion_targets.csv'), delimiter=',') 

In [43]:
    emotion_targets.shape

(24577, 8)

In [17]:
t_inputs[0][1]

array([[ 2.1764698e+00,  2.1764698e+00,  2.1764698e+00, ...,
        -7.8147256e-01, -7.8147256e-01, -7.8147256e-01],
       [ 3.3123832e+00,  3.3123832e+00,  3.3123832e+00, ...,
        -2.8086445e-01, -2.8086445e-01, -2.8086445e-01],
       [ 3.4211106e+00,  3.4211106e+00,  3.4211106e+00, ...,
        -4.6701780e-01, -4.6701780e-01, -4.6701780e-01],
       ...,
       [ 3.8968581e-15,  3.8968581e-15,  3.8968581e-15, ...,
         3.8968581e-15,  3.8968581e-15,  3.8968581e-15],
       [ 3.8968581e-15,  3.8968581e-15,  3.8968581e-15, ...,
         3.8968581e-15,  3.8968581e-15,  3.8968581e-15],
       [ 3.8968581e-15,  3.8968581e-15,  3.8968581e-15, ...,
         3.8968581e-15,  3.8968581e-15,  3.8968581e-15]],
      shape=(128, 99), dtype=float32)

In [19]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [27]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])
predictions = model(x_train[:1]).numpy()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [28]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9146 - loss: 0.2969
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9572 - loss: 0.1477
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9670 - loss: 0.1105
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9730 - loss: 0.0900
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9755 - loss: 0.0787


<keras.src.callbacks.history.History at 0x1772e0f50>

In [29]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - 801us/step - accuracy: 0.9762 - loss: 0.0778


[0.0778232142329216, 0.9761999845504761]

In [32]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[1.4479136e-06, 1.3826105e-08, 4.1731579e-05, 7.1064045e-04,
        1.7275116e-10, 4.9821705e-07, 3.8129198e-11, 9.9921727e-01,
        6.4445530e-06, 2.2055814e-05],
       [2.3529320e-07, 1.9199195e-04, 9.9979466e-01, 6.9858806e-06,
        2.1186967e-16, 2.4818557e-06, 1.3316050e-08, 1.1141943e-16,
        3.6092849e-06, 9.0926878e-15],
       [6.2030398e-07, 9.9941486e-01, 1.2275181e-04, 1.3997315e-05,
        6.5566099e-05, 1.1564008e-06, 1.6209198e-05, 7.2746698e-05,
        2.9184890e-04, 1.9371008e-07],
       [9.9989861e-01, 6.3130554e-09, 1.0407096e-05, 1.6868885e-08,
        3.0503568e-08, 3.4601075e-07, 2.6689897e-05, 1.0980951e-06,
        5.1752711e-08, 6.2811443e-05],
       [8.1732342e-06, 6.6827849e-11, 9.6839040e-06, 1.3209448e-08,
        9.9593085e-01, 1.0363929e-07, 4.7587181e-07, 1.8974736e-04,
        1.0322190e-06, 3.8598701e-03]], dtype=float32)>

In [34]:
len(emotion_target_categories)

8

In [56]:
from progressbar import printProgressBar

import os
from glob import glob
from dataclasses import dataclass
import torch
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

import librosa
import librosa.display
import IPython.display as ipd
from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])


# File Paths

# Get the directory of the current script
script_dir = os.getcwd()
# Other Project Paths
root_dir = os.path.join(script_dir, '..', '..')
training_data_dir = os.path.join(root_dir, 'training data')
categories_dir = os.path.join(training_data_dir, 'categories')
cremad_dir = os.path.join(training_data_dir, 'files', 'CREMA-D')
emogator_dir = os.path.join(training_data_dir, 'files', 'Emogator', 'data', 'mp3')
datasheeet_path = os.path.join(categories_dir, 'data.xlsx')


# Read Data
audio_files = []
data_raw_df = pd.read_excel(datasheeet_path)
headers = data_raw_df.columns.values.tolist()
data_raw_noheaders_df = data_raw_df.values
data_df = pd.DataFrame(data_raw_noheaders_df)

# Extract Targets
emotion_target_categories = headers[3:11]
intensity_target_categories = headers[12:]
selected_emotion_targets_df = data_df.iloc[:, [i for i in range(3, 11)]]
selected_intensity_targets_df = data_df.iloc[:, [i for i in range(12, 15)]]
emotion_targets = selected_emotion_targets_df.to_numpy()
intensity_targets = selected_intensity_targets_df.to_numpy()

# Load the audio files
datasets = data_raw_df['Dataset'].values
files = data_raw_df['File'].values
num_loaded = 0
for dataset, file in zip(datasets, files):
    if dataset == 'CREMA-D':
        file_path = os.path.join(cremad_dir, file)
        audio_files.append(file_path)
        num_loaded += 1
    elif dataset == 'EmoGator':
        file_path = os.path.join(emogator_dir, file)
        audio_files.append(file_path)
        num_loaded += 1
print(f'Loaded {num_loaded} files from dataset.')

# Convert audio file to features (log mel spectogram and log mel delta)
num_loaded = 0
target_frames = 86  # ≈ 2 seconds with librosa defaults (sr=22050, hop_length=512)
printProgressBar(0, len(audio_files), prefix='Progress:', suffix='Complete', length=50)
emotion_input_features_list = []
intensity_input_features_list = []

for afile in audio_files:
    # Extract y = (the raw data), and sr = (integer value of sample rate)
    y, sr = librosa.load(afile)
    
    # Apply STFT
    D = librosa.stft(y)
    
    # Retreive Mel
    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=128 * 2,)
    S_decible_mel = librosa.amplitude_to_db(S, ref=np.max)
    
    # Extract Log Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Standardize the length using padding/truncation #? The shape is (n_mels, time_frames) #?
    fixed_log_mel = librosa.util.fix_length(log_mel_spectrogram, 
                                            size=target_frames, 
                                            axis=1, 
                                            constant_values=0)
    
    # Build input for emotion model
    emotion_sample_features = fixed_log_mel.flatten(order='C')
    emotion_input_features_list.append(emotion_sample_features)

    
    # Extract Delta Mel spectrogram
    delta_log_mel_spectrogram = librosa.feature.delta(log_mel_spectrogram)
    
    # Standardize the delta spectrogram length too
    fixed_delta_log_mel = librosa.util.fix_length(delta_log_mel_spectrogram, 
                                                  size=target_frames, 
                                                  axis=1,
                                                  constant_values=0)

    # Build input for intensity model
    intensity_sample_features = fixed_delta_log_mel.flatten(order='C')
    intensity_input_features_list.append(intensity_sample_features)
    
    num_loaded += 1
    printProgressBar(
        num_loaded,
        len(audio_files),
        prefix='Progress:',
        suffix=f'  [{num_loaded}/{len(audio_files)}]',
        length=50
    )

# Save file to avoid preprocessing more
# np.savetxt('input_features.json', input_features, delimiter=',', fmt='%d', comments='')
np.savetxt('emotion_targets.csv', emotion_targets, delimiter=',', fmt='%d', comments='')
np.savetxt('intensity_targets.csv', intensity_targets, delimiter=',', fmt='%d', comments='')
emotion_input_tensor = torch.tensor(np.array(emotion_input_features_list), dtype=torch.double)
intensity_input_tensor = torch.tensor(np.array(intensity_input_features_list), dtype=torch.double)
print(f"Final EMOTION tensor shape: {emotion_input_tensor.shape}")
print(f"Final INTENSITY tensor shape: {intensity_input_tensor.shape}")
print(f'Wrote numpy arrays to file for training in {script_dir}')
print('Completed preprocessing!')

Loaded 24577 files from dataset.
Progress: |██████████████████████████████████████████████████| 100.0%   [24577/24577]
Final EMOTION tensor shape: torch.Size([24577, 11008])
Final INTENSITY tensor shape: torch.Size([24577, 11008])
Wrote numpy arrays to file for training in /Users/clinvil/Programs/ToneAI/labs/DNN
Completed preprocessing!


In [57]:
emotion_input_tensor.shape

torch.Size([24577, 11008])

In [58]:
intensity_input_tensor.shape

torch.Size([24577, 11008])

In [59]:
torch.save(emotion_input_tensor, 'model1_input.pt')
torch.save(intensity_input_tensor, 'model2_input.pt')