In [18]:
#DQN

import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torchvision.transforms import ToTensor

# Specify the base directory
base_dir = 'Desktop/Saved/'

# Specify the dimensions of your spectrogram images
height, width, channels = 64, 64, 3  # Adjust these dimensions based on your actual spectrogram size

# Function to load and preprocess an image
def load_and_preprocess_image(file_path):
    img = cv2.imread(file_path)
    img = cv2.resize(img, (width, height))
    img_array = img / 255.0  # Normalize pixel values to [0, 1]
    return np.transpose(img_array, (2, 0, 1))  # Adjust the image dimensions for PyTorch

# Function to build the DQN model using PyTorch
class DQNModel(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(DQNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * (height // 4) * (width // 4), 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Iterate over all actors, emotions, emotional intensities, statements, and repetitions
all_data = []
all_labels = []

for actor in range(1, 25):
    for emotion in range(1, 9):
        for intensity in range(1, 3):
            for statement in range(1, 3):
                for repetition in range(1, 3):
                    # Generate the file path based on the filename identifiers
                    file_name = f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}'
                    file_path = os.path.join(base_dir, f'Actor_{actor:02d}', f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}', 'mono_data-channel-frequency.png')

                    # Check if the file exists
                    if os.path.exists(file_path):
                        # Load and preprocess the image
                        spectrogram = load_and_preprocess_image(file_path)

                        # Append data and labels
                        all_data.append(spectrogram)
                        all_labels.append(f'{emotion:02d}-{intensity:02d}')

                        print(f'Processed: {file_path}')
                    else:
                        print(f'File not found: {file_path}')

# Convert lists to NumPy arrays
all_data = np.array(all_data)
all_labels = np.array(all_labels)

# Encode emotion and intensity labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

# Convert labels to PyTorch tensors
encoded_labels = torch.tensor(encoded_labels, dtype=torch.long)
all_data = torch.tensor(all_data, dtype=torch.float32)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    all_data, encoded_labels, test_size=0.2, random_state=42
)

# Build the DQN model using PyTorch
model = DQNModel(input_shape=(channels, height, width), num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for step, (data, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch + 1}, Average Loss: {total_loss / len(train_loader)}')
    if (epoch + 1) % 10 == 0:
            model.eval()
            correct = 0
            total = 0

            with torch.no_grad():
                for data, labels in test_loader:
                    outputs = model(data)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = correct / total
            print(f'Epoch {epoch + 1}, Test accuracy: {accuracy * 100}%')

Processed: Desktop/Saved/Actor_01\03-01-01-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Save

In [19]:
#Dueling DQN

import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Specify the base directory
base_dir = 'Desktop/Saved/'

# Specify the dimensions of your spectrogram images
height, width, channels = 64, 64, 3  # Adjust these dimensions based on your actual spectrogram size

# Function to load and preprocess an image
def load_and_preprocess_image(file_path):
    img = cv2.imread(file_path)
    img = cv2.resize(img, (width, height))
    img_array = img / 255.0  # Normalize pixel values to [0, 1]
    return np.transpose(img_array, (2, 0, 1))  # Adjust the image dimensions for PyTorch

# Function to build the DQN model using PyTorch
class DQNModel(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(DQNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * (height // 4) * (width // 4), 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Iterate over all actors, emotions, emotional intensities, statements, and repetitions
all_data = []
all_labels = []

for actor in range(1, 25):
    for emotion in range(1, 9):
        for intensity in range(1, 3):
            for statement in range(1, 3):
                for repetition in range(1, 3):
                    # Generate the file path based on the filename identifiers
                    file_name = f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}'
                    file_path = os.path.join(base_dir, f'Actor_{actor:02d}', f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}', 'mono_data-channel-frequency.png')

                    # Check if the file exists
                    if os.path.exists(file_path):
                        # Load and preprocess the image
                        spectrogram = load_and_preprocess_image(file_path)

                        # Append data and labels
                        all_data.append(spectrogram)
                        all_labels.append(f'{emotion:02d}-{intensity:02d}')

                        print(f'Processed: {file_path}')
                    else:
                        print(f'File not found: {file_path}')

# Convert lists to NumPy arrays
all_data = np.array(all_data)
all_labels = np.array(all_labels)

# Encode emotion and intensity labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

# Convert labels to PyTorch tensors
encoded_labels = torch.tensor(encoded_labels, dtype=torch.long)
all_data = torch.tensor(all_data, dtype=torch.float32)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    all_data, encoded_labels, test_size=0.2, random_state=42
)

# Build the DQN model using PyTorch
model = DQNModel(input_shape=(channels, height, width), num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for step, (data, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch + 1}, Average Loss: {total_loss / len(train_loader)}')

    if (epoch + 1) % 10 == 0:
            model.eval()
            correct = 0
            total = 0

            with torch.no_grad():
                for data, labels in test_loader:
                    outputs = model(data)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = correct / total
            print(f'Epoch {epoch + 1}, Test accuracy: {accuracy * 100}%')

Processed: Desktop/Saved/Actor_01\03-01-01-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Save

In [3]:
#DDQN

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image

# Specify the base directory
base_dir = 'Desktop/Saved/'

# Specify the dimensions of your spectrogram images
height, width, channels = 64, 64, 3  # Adjust these dimensions based on your actual spectrogram size

# Function to load and preprocess an image
def load_and_preprocess_image(file_path):
    img = Image.open(file_path).convert('RGB')  # Convert to 3 channels
    img = img.resize((width, height))
    img_array = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
    return np.transpose(img_array, (2, 0, 1))  # Adjust the image dimensions for PyTorch

# Function to build the DQN model using PyTorch
class DQNModel(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(DQNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * (height // 4) * (width // 4), 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Iterate over all actors, emotions, emotional intensities, statements, and repetitions
all_data = []
all_labels = []

for actor in range(1, 25):
    for emotion in range(1, 9):
        for intensity in range(1, 3):
            for statement in range(1, 3):
                for repetition in range(1, 3):
                    # Generate the file path based on the filename identifiers
                    file_name = f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}'
                    file_path = os.path.join(base_dir, f'Actor_{actor:02d}', f'03-01-{emotion:02d}-{intensity:02d}-{statement:02d}-{repetition:02d}-{actor:02d}', 'mono_data-channel-frequency.png')

                    # Check if the file exists
                    if os.path.exists(file_path):
                        # Load and preprocess the image
                        spectrogram = load_and_preprocess_image(file_path)

                        # Append data and labels
                        all_data.append(spectrogram)
                        all_labels.append(f'{emotion:02d}-{intensity:02d}')

                        print(f'Processed: {file_path}')
                    else:
                        print(f'File not found: {file_path}')

# Convert lists to NumPy arrays
all_data = np.array(all_data)
all_labels = np.array(all_labels)

# Encode emotion and intensity labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

# Convert labels to PyTorch tensors
encoded_labels = torch.tensor(encoded_labels, dtype=torch.long)
all_data = torch.tensor(all_data, dtype=torch.float32)

# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    all_data, encoded_labels, test_size=0.2, random_state=42
)

# Build the DQN model using PyTorch
model = DQNModel(input_shape=(channels, height, width), num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for step, (data, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch + 1}, Average Loss: {total_loss / len(train_loader)}')
    if (epoch + 1) % 10 == 0:
            model.eval()
            correct = 0
            total = 0

            with torch.no_grad():
                for data, labels in test_loader:
                    outputs = model(data)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = correct / total
            print(f'Epoch {epoch + 1}, Test accuracy: {accuracy * 100}%')


Processed: Desktop/Saved/Actor_01\03-01-01-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-01-01-02-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-01-02-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-01-01\mono_data-channel-frequency.png
File not found: Desktop/Saved/Actor_01\03-01-01-02-02-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-01-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-01-02-01\mono_data-channel-frequency.png
Processed: Desktop/Saved/Actor_01\03-01-02-01-02-01-01\mono_data-channel-frequency.png
Processed: Desktop/Save