In [58]:
import os
import librosa
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt


In [60]:
import os
import librosa
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

# Parametri (Parameters)
DATASET_PATH = "datasets/UrbanSound8K"
SAMPLE_RATE = 22050
DURATION = 4
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
MEL_BINS = 64
FIXED_LENGTH = 128 # Širina mel spektrograma (Mel spectrogram width)

# Funkcija za izračun mel spektrograma (Function to extract Mel spectrogram)
def extract_mel(file_path):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=MEL_BINS)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    return mel_db

# Nalaganje datotek in label (Loading files and labels)
X = [] # Spektrogrami (Spectrograms)
y = [] # Labels
file_names = [] # Imena datotek (File names)

# Metadata datoteka (Metadata file)
# UrbanSound8K dataset vsebuje CSV datoteko z meta-podatki, ki vključujejo razrede zvokov.
# Potrebujemo jo, da pridobimo pravilne labele za vsako zvočno datoteko.
# The UrbanSound8K dataset contains a CSV metadata file that includes sound classes.
# We need it to get the correct labels for each audio file.
metadata_path = os.path.join(DATASET_PATH, "UrbanSound8K.csv")
import pandas as pd
metadata = pd.read_csv(metadata_path)

for fold in range(1, 11):  # fold1 do fold10
    folder_path = os.path.join(DATASET_PATH, f"fold{fold}")
    for fname in tqdm(os.listdir(folder_path), desc=f"Processing Fold {fold}"):
        if fname.endswith(".wav"):
            path = os.path.join(folder_path, fname)
            try:
                mel = extract_mel(path)
                # Zagotovimo, da imajo vsi spektrogrami enako dolžino
                # Ensure all spectrograms have the same length
                if mel.shape[1] >= FIXED_LENGTH:
                    mel = mel[:, :FIXED_LENGTH]  # odreži preveč (trim excess)
                else:
                    # Dodajanje ničel (padding with zeros)
                    padding = FIXED_LENGTH - mel.shape[1]
                    mel = np.pad(mel, ((0, 0), (0, padding)), mode='constant')

                X.append(mel)
                # Poišči labelo v meta-podatkih glede na ime datoteke
                # Find the label in the metadata based on the file name
                label = metadata[metadata['slice_file_name'] == fname]['class'].iloc[0]
                y.append(label)

            except Exception as e:
                print(f"Napaka pri obdelavi {fname}: {e}")

X = np.array(X)
# Dodaj kanal dimenzijo za CNN (Add channel dimension for CNN)
X = X[..., np.newaxis]
print(f"Končna oblika X (Final shape of X): {X.shape}")

# Pretvorba string label v numerične (Convert string labels to numerical)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
# Pretvorba v one-hot encoding (Convert to one-hot encoding)
y_categorical = to_categorical(y_encoded)

# Razdelitev podatkov na učno in testno množico (Split data into training and test sets)
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

print(f"Oblika X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Oblika X_test: {X_test.shape}, y_test: {y_test.shape}")

# --- Gradnja CNN modela (Building the CNN Model) ---
# Model je zasnovan tako, da je sorazmerno lahek za hitro učenje na CPE.
# The model is designed to be relatively lightweight for faster training on CPU.
def create_cnn_model(input_shape, num_classes):
    model = tf.keras.models.Sequential([
        # Prvi konvolucijski blok (First convolutional block)
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape,
                               kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(), # Normalizacija vhodov (Normalizing inputs)
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.3), # Preprečevanje prekomernega prilagajanja (Preventing overfitting)

        # Drugi konvolucijski blok (Second convolutional block)
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu',
                               kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.3),

        # Tretji konvolucijski blok (Third convolutional block)
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu',
                               kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.3),

        # Sploščenje in klasifikacijski sloji (Flattening and classification layers)
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax') # Izhodni sloj z aktivacijo softmax za klasifikacijo
                                                               # Output layer with softmax activation for classification
    ])
    return model

# Dimenzije vhoda in število razredov (Input dimensions and number of classes)
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3]) # (MEL_BINS, FIXED_LENGTH, 1)
num_classes = y_categorical.shape[1] # Število edinstvenih razredov (Number of unique classes)

model = create_cnn_model(input_shape, num_classes)
model.summary()

# --- Kompilacija modela (Compiling the Model) ---
# Uporabljamo Adam optimizator in categorical_crossentropy kot funkcijo izgube,
# saj gre za večrazredno klasifikacijo.
# We use the Adam optimizer and categorical_crossentropy as the loss function,
# as this is a multi-class classification problem.
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks za spremljanje učenja (Callbacks for monitoring training)
# EarlyStopping bo ustavil učenje, če se validacijska izguba ne izboljša,
# da se prepreči prekomerno prilagajanje in skrajša čas učenja.
# EarlyStopping will stop training if validation loss does not improve,
# to prevent overfitting and shorten training time.
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)
]

# --- Učenje modela (Training the Model) ---
# Čas učenja je omejen na CPE, zato bomo izbrali razumno število epoh in velikost paketov.
# The training time is limited to CPU, so we will choose a reasonable number of epochs and batch size.
EPOCHS = 100 # Nastavljeno na višjo vrednost, vendar bo EarlyStopping ustavil prej, če je potrebno
             # Set to a higher value, but EarlyStopping will stop earlier if necessary
BATCH_SIZE = 32

print("\n--- Začetek učenja modela (Starting model training) ---")
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks=callbacks,
                    verbose=1)
print("--- Konec učenja modela (Finished model training) ---")

# --- Evaluacija modela (Evaluating the Model) ---
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTestna izguba (Test Loss): {loss:.4f}")
print(f"Testna natančnost (Test Accuracy): {accuracy:.4f}")

# --- Prikaz rezultatov učenja (Displaying Training Results) ---
plt.figure(figsize=(12, 5))

# Graf natančnosti (Accuracy plot)
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Učna natančnost (Training Accuracy)')
plt.plot(history.history['val_accuracy'], label='Validacijska natančnost (Validation Accuracy)')
plt.title('Natančnost modela (Model Accuracy)')
plt.xlabel('Epoha (Epoch)')
plt.ylabel('Natančnost (Accuracy)')
plt.legend()
plt.grid(True)

# Graf izgube (Loss plot)
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Učna izguba (Training Loss)')
plt.plot(history.history['val_loss'], label='Validacijska izguba (Validation Loss)')
plt.title('Izguba modela (Model Loss)')
plt.xlabel('Epoha (Epoch)')
plt.ylabel('Izguba (Loss)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# --- Primer napovedi (Example Prediction) ---
# Vzemi naključen vzorec iz testne množice
# Take a random sample from the test set
sample_index = np.random.randint(0, len(X_test))
sample_mel = X_test[sample_index]
true_label_encoded = np.argmax(y_test[sample_index])
true_label = label_encoder.inverse_transform([true_label_encoded])[0]

# Napoved (Prediction)
# Dodamo dimenzijo paketa, saj model pričakuje vhod oblike (batch_size, height, width, channels)
# Add batch dimension, as the model expects input of shape (batch_size, height, width, channels)
predicted_probabilities = model.predict(np.expand_dims(sample_mel, axis=0))[0]
predicted_label_encoded = np.argmax(predicted_probabilities)
predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]

print(f"\nPrimer napovedi za vzorec št. {sample_index}:")
print(f"Pravi razred (True Class): {true_label}")
print(f"Napovedan razred (Predicted Class): {predicted_label}")
print(f"Verjetnosti napovedi (Prediction Probabilities): {predicted_probabilities}")

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/UrbanSound8K/UrbanSound8K.csv'

Končna oblika X (Final shape of X): (1000, 100, 40, 1, 1, 1)
Oblika X_train: (800, 100, 40, 1, 1, 1), y_train: (800, 10)
Oblika X_test: (200, 100, 40, 1, 1, 1), y_test: (200, 10)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Začetek učenja modela (Starting model training) ---
Epoch 1/100


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32, 100, 40, 1, 1, 1), dtype=float32). Expected shape (None, 100, 40, 1), but input has incompatible shape (32, 100, 40, 1, 1, 1)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 100, 40, 1, 1, 1), dtype=float32)
  • training=True
  • mask=None
  • kwargs=<class 'inspect._empty'>