In [4]:
my_data_path = '/home/dog4/PhD/computer_vision/VOiCES_Box_unzip/Training_Data/Automatic_Speech_Recognition/ASR_train/modified-train-clean-80'

# Data processing

### Take data from my_data_path path and store it in plain directory designated by **user**

In [11]:
import os
import librosa
import soundfile as sf
import noisereduce as nr
import matplotlib.pyplot as plt
import numpy as np
import shutil
from sklearn.model_selection import train_test_split
import random

In [1]:
PROJECT_PATH = '/home/dog4/PhD/computer_vision/cvproj'

In [2]:
audio_path = os.path.join(PROJECT_PATH, 'data')

In [8]:
cls = 'cls0'

In [None]:
# run for cls0 and cls1
source_folder = source_folder = os.path.join(audio_path, f'{cls}/preprocessed')
output_folder = os.path.join(audio_path, f'{cls}/spectograms')

In [10]:
# === Disable plot GUI backend ===
plt.switch_backend('Agg')

# === Process each .wav file ===
success_count = 0
error_count = 0

for filename in os.listdir(source_folder):
    if not filename.lower().endswith(".wav"):
        continue

    try:
        file_path = os.path.join(source_folder, filename)
        y, sr = librosa.load(file_path, sr=None)

        # Convert to Mel Spectrogram
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_dB = librosa.power_to_db(S, ref=np.max)

        # Save as monochrome image
        fig = plt.figure(figsize=(2.24, 2.24), dpi=100)  # Gives you 224x224
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(S_dB, sr=sr, cmap='gray')
        out_path = os.path.join(output_folder, filename.replace(".wav", ".png"))
        plt.savefig(out_path, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

        success_count += 1

    except Exception as e:
        error_count += 1
        continue

print(f"✅ Conversion complete. {success_count} spectrograms saved, {error_count} files skipped due to errors.")



✅ Conversion complete. 154829 spectrograms saved, 0 files skipped due to errors.


## Splitin data into folders according to their class afiliation

In [12]:
# Explicit source paths for each class
class_dirs = {
    "green": "/home/dog4/PhD/computer_vision/cvproj/data/cls1/spectograms",
    "red": "/home/dog4/PhD/computer_vision/cvproj/data/cls0/spectograms"
}

output_dir = "./data/split_dataset"

# Parameters
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Seed for reproducibility
random.seed(42)

# Create output directories
def make_dirs():
    for split in ["train", "val", "test"]:
        for cls in class_dirs.keys():
            os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)

# Split and copy images
def split_data():
    make_dirs()

    for cls, class_path in class_dirs.items():
        all_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.lower().endswith(".png")]

        train_files, temp = train_test_split(all_files, test_size=1-train_ratio, random_state=42)
        val_files, test_files = train_test_split(temp, test_size=test_ratio/(test_ratio + val_ratio), random_state=42)

        for file in train_files:
            shutil.copy(file, os.path.join(output_dir, "train", cls))
        for file in val_files:
            shutil.copy(file, os.path.join(output_dir, "val", cls))
        for file in test_files:
            shutil.copy(file, os.path.join(output_dir, "test", cls))

split_data()


## Convolutional stuff

In [None]:
from torchvision import models
import torch.nn as nn

# Load ResNet18
model = models.resnet18(pretrained=True)

# Adjust for grayscale spectrograms (1 channel)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Replace the last layer with one that matches your number of classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))

# Move to device
model = model.to(device)


In [1]:
import torch
print(torch.__version__)


AttributeError: module 'torch' has no attribute '__version__'