# CWRU Spectrograms

In [None]:
import numpy as np
import scipy.io
import os

Change input and output folders as necessary:

In [None]:
# Folder for CWRU .mat-files
input_folder_path = r'/home/sigurds/data/cwru/48k'

# Desired output folder
output_folder_path = r'/home/sigurds/data/cwru/spectrogram_48k'

Do not change:

In [None]:
# .mat-file dataframe key for time-series data
key = 3

# Default time length of all CWRU files
sequence_length = 1000

In [None]:
def read_mat_file(file_path, position):
    """Returns values at specified position in .mat-file"""
    mat_contents = scipy.io.loadmat(file_path)
    values_list = list(mat_contents.values())
    return values_list[position]

def generate_spectrogram(data, seq):
    """Generate spectrogram from time-series data"""
    samp_per_timestamp = data.shape[0] // seq
    spectrogram = []
    for i in range(seq):
        segment = data[i * samp_per_timestamp:(i + 1) * samp_per_timestamp, 0]
        fft_result = np.fft.fft(segment)
        spectrogram.append(np.abs(fft_result))
    return spectrogram

def save_to_mat_file(output_path, file_name, spectrogram):
    full_output_path = os.path.join(output_path, 'spectrogram_' + file_name)
    scipy.io.savemat(full_output_path, {'spectrogram': spectrogram})

def transpose_spectrogram_DB(spectrogram):
    """Converts the spectrogram data into decibels"""
    spectrogram = np.transpose(spectrogram)
    
    spectrogram_max = spectrogram.max()
    if spectrogram_max != 0:
        spectrogram = spectrogram / spectrogram_max
    spectrogram = 20 * np.log10(spectrogram + 1e-6)
    spectrogram = split_spectrogram(spectrogram)
    return spectrogram

def split_spectrogram(spectrogram):
    """Removes redundant mirrored data in bottom half of spectrogram"""
    split_index = spectrogram.shape[0]//2
    upper_half = spectrogram[:split_index, :]
    return upper_half 

Generate spectrograms and save to output folder:

In [None]:
# Generate spectrogram for each .mat-file and save to output folder with original file name
for file_name in os.listdir(input_folder_path):
    if file_name.endswith('.mat'):
        full_file_path = os.path.join(input_folder_path, file_name)
        data = read_mat_file(full_file_path, key)
        if data is not None and len(data) >= sequence_length:
            spectrogram = generate_spectrogram(data, sequence_length)
            spectrogram_DB = transpose_spectrogram_DB(spectrogram)
            save_to_mat_file(output_folder_path, file_name, spectrogram_DB) 
        else:
            print(f"Skipping file {file_name}, insufficient data.")