In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from transformers import Wav2Vec2Processor, Wav2Vec2Model
import pickle
import os


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define the AudioCNN class
class AudioCNN(nn.Module):
    def __init__(self, num_classes):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64, 128)  # This 64 will be replaced dynamically
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flatten the features for the linear layer
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def initialize_fc1(self, input_size):
        sample_input = torch.rand(1, 1, input_size)
        output = self.pool(F.relu(self.conv3(self.pool(F.relu(self.conv2(self.pool(F.relu(self.conv1(sample_input)))))))))
        output_size = output.view(-1).shape[0]
        self.fc1 = nn.Linear(output_size, 128)

# Function to load data
def load_data(dataset_path, data_type, sampling_rate=16000):
    data = []
    labels = []
    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
    model_wav2vec2 = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h")

    for file_number in range(1, 51):
        filename = f"data_{file_number}_{data_type}.pkl"
        file_path = os.path.join(dataset_path, filename)
        if os.path.exists(file_path):
            with open(file_path, 'rb') as file:
                file_data = pickle.load(file)
                for waveform, label in file_data:
                    input_values = processor(waveform.squeeze().numpy(), return_tensors="pt", sampling_rate=sampling_rate).input_values
                    with torch.no_grad():
                        features = model_wav2vec2(input_values).last_hidden_state.squeeze().mean(dim=0)
                    data.append(features)
                    labels.append(label)
    labels = [x[0] for x in labels]
    print("accessing files completed.......")
    return torch.stack(data), torch.tensor(labels)


In [3]:

# Load data
dataset_path = 'emi_dataset/'


train_data, train_labels = load_data(dataset_path, "train")
print("train data load complete")
train_loader = DataLoader(TensorDataset(train_data, train_labels), batch_size=32, shuffle=True)

validate_data, validate_labels = load_data(dataset_path, "valid")
print("valid data load complete")
validate_loader = DataLoader(TensorDataset(validate_data, validate_labels), batch_size=32)

test_data, test_labels = load_data(dataset_path, "test")
print("test data load complete")
test_loader = DataLoader(TensorDataset(test_data, test_labels), batch_size=32)


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


accessing files completed.......
train data load complete


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


accessing files completed.......
valid data load complete


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


accessing files completed.......
test data load complete


In [1]:
# Initialize AudioCNN and load the pre-trained model
num_classes = 10  # Update based on your dataset
audio_cnn = AudioCNN(num_classes=num_classes)
audio_cnn.initialize_fc1(1024)  # Initialize with the correct input size

# Load the saved model
model_path = 'model_directory/audiocnn.pth'

if os.path.exists(model_path):
    saved_state_dict = torch.load(model_path, map_location=torch.device('cpu'))
    audio_cnn.load_state_dict(saved_state_dict)
    

# Define optimizer and loss function
optimizer = torch.optim.Adam(audio_cnn.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
print("reached training loop, setting epochs to 10")
epochs = 20
for epoch in range(epochs):
    audio_cnn.train()  # Set the model to training mode
    running_loss = 0.0
    for features, labels in train_loader:
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = audio_cnn(features.unsqueeze(1))
        loss = criterion(outputs, labels)
        loss.backward()  # Backpropagation
        optimizer.step()  # Optimize
        running_loss += loss.item()

    # Print training loss
    print(f'Epoch {epoch+1}, Training Loss: {running_loss / len(train_loader):.4f}')

    # Validation phase
    audio_cnn.eval()  # Set the model to evaluation mode
    validation_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in validate_loader:
            outputs = audio_cnn(features.unsqueeze(1))
            loss = criterion(outputs, labels)
            validation_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Epoch {epoch+1}, Validation Loss: {validation_loss / len(validate_loader):.4f}, Validation Accuracy: {(100 * correct / total):.2f}%')

    # Test phase
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in test_loader:
            outputs = audio_cnn(features.unsqueeze(1))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Epoch {epoch+1}, Test Accuracy: {(100 * correct / total):.2f}%')

NameError: name 'AudioCNN' is not defined