In [1]:
from dataTransform import *
from Maglib import *
import numpy as np
import os
import shutil
import numpy as np

# Path Setting

In [2]:
raw_data_path = r"C:\Users\ossia\Downloads\pre-training\pre-training" # Set path to folder containing unzipped MagNet material datasets

processed_data = "processed_training_dataset" # Name of folder in cwd that will store downsampled, split (train, test, validate) material data

current_directory = os.getcwd()

# Create the full path to the new folder
new_folder_path = os.path.join(current_directory, processed_data)

# Remove the existing folder if it exists
if os.path.exists(new_folder_path):
    shutil.rmtree(new_folder_path)
    print(f"Existing folder '{processed_data}' deleted")

# Create the new folder
os.makedirs(new_folder_path)
print(f"Folder '{processed_data}' created in {current_directory}")

training_materials = []

# Iterate through the materials in raw directory
for item in os.listdir(raw_data_path):
    item_path = os.path.join(raw_data_path, item)
    if os.path.isdir(item_path):
        training_materials.append(item)

print("Training Materials:", training_materials)

# Save the list of training materials to a text file
with open("training_materials.txt", 'w') as file:
    for mat in training_materials:
        file.write(mat + '\n')
print(f"List of training materials have been saved to cwd")

Existing folder 'processed_training_dataset' deleted
Folder 'processed_training_dataset' created in c:\Users\ossia\Documents\GitHub\MagLearn-Bristol-2\Single Pipeline
Training Materials: ['3C90', '3C94', '3E6', '3F4', '77', '78', 'N27', 'N30', 'N49', 'N87']
List of training materials have been saved to cwd


# Training Data Pre-Processing

In [3]:
for mat in training_materials: # Will iterate through all materials present in provided training dataset folder, outputting to processed_data folder
    processed_mat_path = os.path.join(processed_data, mat) # Create folder for processed material data
    # Create the new subfolder
    try:
        os.makedirs(processed_mat_path)
        print(f"Subfolder '{mat}' created in {processed_data}")
    except FileExistsError:
        print(f"Subfolder '{mat}' already exists in {processed_data}")
        continue
    
    unprocessed_mat_path = os.path.join(raw_data_path, mat)
    raw_data = Maglib.MagLoader(
        unprocessed_mat_path,
        data_source='csv'
    )
    newStep=128 # Resampled length 
        
    # Ensures all arrays are 2 dimensional
    def ensure_2d(arr): 
        if arr.ndim == 1:
            return arr[:, np.newaxis]
        return arr
    raw_data.temp = ensure_2d(raw_data.temp)
    raw_data.loss = ensure_2d(raw_data.loss)
    raw_data.freq = ensure_2d(raw_data.freq)
    
    raw_data=dataTransform(raw_data, newStep, processed_mat_path, plot=False)
    dataSplit(raw_data, processed_mat_path)
    print("Rescaled, downsampled and split material:", mat)

Subfolder '3C90' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 3C90
Subfolder '3C94' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 3C94
Subfolder '3E6' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 3E6
Subfolder '3F4' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 3F4
Subfolder '77' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 77
Subfolder '78' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: 78
Subfolder 'N27' created in processed_training_dataset
Data transform done
DataSplit done
Rescaled, downsampled and split material: N27
Subfolder 'N30' created in processed_training_dataset
D