<a href="https://colab.research.google.com/github/Bitang-Melyen-Tanulok/Csip_Csip/blob/main/Cross_valid_Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from PIL import Image
from google.colab import drive

X = []
Y = []
drive.mount('/content/drive')
path = '/content/drive/MyDrive/DeepLearning'

spectrograms_path = os.path.join(path, 'sample_train_spectrograms_20_first_newSg')  # Adott mappa elérési útvonala
folders = os.listdir(spectrograms_path)

for folder in folders:
    print(f"Current folder: {folder}")
    folder_path = os.path.join(spectrograms_path, folder)  # Mappa útvonal

    # Fájlok bejárása a mappában
    files = os.listdir(folder_path)

    for file in files:
        if file.endswith('.png'):  # Csak PNG fájlokat olvasunk be
            file_path = os.path.join(folder_path, file)

            # Kép betöltése
            img = image.load_img(file_path, color_mode='grayscale')
            img_array = image.img_to_array(img)  # Kép tömbbé alakítása
            X.append(img_array)

            # Címke hozzáadása (mappanév alapján)
            Y.append(folder)


X = np.array(X)
Y = np.array(Y)
print(X.shape)
print(Y.shape)
print()
test_split = 0.1
valid_split = 0.1

#shuffle X and Y the same way
permutation = np.random.permutation(len(X))
X = X[permutation]
Y = Y[permutation]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current folder: blrwar1


KeyboardInterrupt: 

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

#Since this is a multi-class classification task, I am converting labels to one-hot format:

#First, the labels need to be converted into numerical values
le = LabelEncoder()
Y_encoded = le.fit_transform(Y)

#Getting number of classes
num_classes = len(le.classes_)
print(f"Class number= {num_classes}")

#Converting to one-hot encoding
Y_onehot = to_categorical(Y_encoded, num_classes)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

#Also implementing early stopping, since there is no reason for it to go through when val_loss isn't decreasing
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5, #If it doesn't improve for 5 epochs, it concludes
    verbose=1,
    restore_best_weights=True
)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

#Wanting to save the best model, so implementing checkpointing
checkpoint = ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

In [None]:
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

fold_accuracies = []
fold_losses = []

history_per_fold = []

# Start Stratified K-Fold cross-validation
fold_number = 1
for train_index, test_index in skf.split(X, Y_encoded):
  print(f"Startung fold: {fold_number}")

  X_train, X_test = X[train_index], X[test_index]
  Y_train, Y_test = Y_onehot[train_index], Y_onehot[test_index]

  X_train = X_train / np.max(X_train)
  X_test = X_test / np.max(X_test)

  model = models.Sequential()
  model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(128, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Flatten())
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(num_classes, activation='softmax'))

  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  history = model.fit(
      X_train, Y_train,
      epochs=50,
      batch_size=32,
      validation_data=(X_test, Y_test),
      callbacks=[early_stopping],
      verbose=1
  )

  history_per_fold.append(history)

  test_loss, test_accuracy = model.evaluate(X_test, Y_test)
  print(f"Fold {fold_number} - Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

  fold_accuracies.append(test_accuracy)
  fold_losses.append(test_loss)

  fold_number += 1

#To display the model
model.summary()

In [None]:
print("\nCross-Validation Results:")
print(f"Mean Validation Loss: {np.mean(fold_losses)}")
print(f"Mean Validation Accuracy: {np.mean(fold_accuracies)}")

# print fold loss and accuracies separately too
for i, (loss, accuracy) in enumerate(zip(fold_losses, fold_accuracies), start=1):
    print(f"Fold {i} - Validation Loss: {loss}, Validation Accuracy: {accuracy}")

In [None]:
# Plot loss and accuracy for each fold
for i, history in enumerate(history_per_fold):
  plt.figure(figsize=(12, 6))

  # Plot accuracy
  plt.subplot(1, 2, 1)
  plt.plot(history.history['accuracy'], label='Training Accuracy')
  plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
  plt.title(f'Fold {i + 1} Accuracy')
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.legend()

  # Plot loss
  plt.subplot(1, 2, 2)
  plt.plot(history.history['loss'], label='Training Loss')
  plt.plot(history.history['val_loss'], label='Validation Loss')
  plt.title(f'Fold {i + 1} Loss')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.legend()

  plt.tight_layout()
  plt.show()