In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import pandas as pd

# Daten aus der CSV-Datei lesen
df = pd.read_csv('audio_data.csv')

# Aufteilen der Daten in Features und Labels
X = df[['mfcc', 'mel']].values
print(type(X),X)
y = df['label'].values

# Flachlegen der 'mfcc' und 'mel' Daten
X_mfcc = np.array(df['mfcc'].to_list())
X_mel = np.array(df['mel'].to_list())

# Zusammenf√ºhren der Daten
X = np.concatenate([X_mfcc, X_mel], axis=0)

# Konvertieren der Daten in PyTorch-Tensoren
X_train = torch.from_numpy(X).float()
y_train = torch.from_numpy(y).long()

# Erstellen des CNN-Modells
print("defining model")
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv(1, 32, (3, 3))
        self.pool1 = nn.MaxPooling2d((2, 2))
        self.conv2 = nn.Conv(32, 64, (3, 3))
        self.pool2 = nn.MaxPooling2d((2, 2))
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Initialisieren des Modells
model = CNN()

# Definieren des Optimierers und der Verlustfunktion
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

print("training model")
# Trainieren des Modells
for epoch in range(10):
    # Trainieren des Modells mit einem Batch
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Drucken des Verlusts
    print(f"Epoch {epoch + 1}: {loss.item()}")


# # Bewertung des Modells
# with torch.no_grad():
#     outputs = model(X_test)
#     _, predicted = torch.max(outputs.data, 1)
#     accuracy = (predicted == y_test).sum().item() / len(y_test)

# print(f"Accuracy: {accuracy}")

# Speichern des Modells
torch.save(model.state_dict(), "model.pt")


[['[[-6.36772095e+02 -6.01351868e+02 -5.98674927e+02 ... -5.96193359e+02\n  -6.02542908e+02 -6.29983215e+02]\n [ 1.17817635e+02  1.19560287e+02  1.20501282e+02 ...  1.28340515e+02\n   1.24758163e+02  1.23324860e+02]\n [-1.31687403e+01 -1.40172291e+01 -1.52692375e+01 ... -6.36825562e+00\n  -3.44912481e+00 -3.76777005e+00]\n ...\n [ 2.93491745e+00  1.23710239e+00  2.10321829e-01 ...  8.02035332e-01\n   2.95523429e+00  1.87842846e-02]\n [-4.29850221e-02  1.52796900e+00 -9.68683898e-01 ... -4.23005486e+00\n  -4.47066212e+00 -4.94823027e+00]\n [ 5.69604969e+00  4.61984825e+00  3.88371468e+00 ...  5.36578465e+00\n   1.83199704e+00  4.91037846e+00]]'
  '[[3.3881725e-04 4.8105192e-04 4.8866577e-04 ... 8.7487127e-04\n  6.2371884e-04 4.0940859e-04]\n [1.0274362e-03 2.5900074e-03 3.0417112e-03 ... 2.6755454e-03\n  2.5298996e-03 9.4829843e-04]\n [2.5718758e-04 3.1528703e-04 3.0855997e-04 ... 2.4197347e-04\n  4.0573842e-04 1.9678329e-04]\n ...\n [6.4503965e-08 1.4934807e-07 1.4974724e-07 ... 2.5106

TypeError: can't convert np.ndarray of type numpy.str_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.