In [2]:
#################### Includes modules
import numpy as np
import glob as gl
import csv
import pandas
import h5py
import os
from decimal import Decimal, getcontext
import tensorflow as tf

getcontext().prec = 1
FILES_WITH_DATA = gl.glob("../Data/*.mat")

#################### Date Set Class
# Classification types: 1 -> for loading files with data from the scope of valve opening range = 5%
#                       2 -> for files with samples for every 1% of valve opening range
#                       3 -> for loading all files
#                       4 -> for loading data from files with valve opening range every 10%
#                       5 -> for date tables for conv nn with generall classification of 1 type
# Each sensor have id (N), if sensorN will get value 1, then it takes part in dataset collection.

class Dataset:
    def __init__(self, classification_type = 1, data_number_in_sample = 2000, train_to_valid_size = 0.8, conv_dataset = False, conv_axis_X = 43,  sensor1 = 1, sensor2 = 1,  sensor3 = 1, sensor4 = 1,  sensor5 = 1, sensor6 = 1,  sensor7 = 1):
        self.classification_type = classification_type
        self.data_number_in_sample = data_number_in_sample
        self.train_size = int(data_number_in_sample * train_to_valid_size) // 1
        self.valid_size = int(data_number_in_sample * (Decimal(1) - Decimal(train_to_valid_size))) // 1
        self.conv_axis_X = conv_axis_X
        self.conv_dataset = conv_dataset
        self.sensor1 = sensor1
        self.sensor2 = sensor2
        self.sensor3 = sensor3
        self.sensor4 = sensor4
        self.sensor5 = sensor5
        self.sensor6 = sensor6
        self.sensor7 = sensor7
        self.number_of_sensor_in_use = sensor1 + sensor2 + sensor3 + sensor4 + sensor5 + sensor6 + sensor7
        if conv_dataset:
            self.training_data = np.zeros([(self.train_size * self.samples_number(classification_type)), self.conv_axis_X, self.number_of_sensor_in_use])
            self.validation_data = np.zeros([(self.valid_size * self.samples_number(classification_type)), self.conv_axis_X, self.number_of_sensor_in_use])
        else:
            self.training_data = np.zeros([(self.train_size * self.samples_number(classification_type)), self.number_of_sensor_in_use])
            self.validation_data = np.zeros([(self.valid_size * self.samples_number(classification_type)), self.number_of_sensor_in_use])
        self.training_valve_opening_scopes = np.zeros((self.train_size * self.samples_number(classification_type)))
        self.validation_valve_opening_scopes = np.zeros((self.valid_size * self.samples_number(classification_type)))
        self.sort_files_with_data()
        self.data_reader()


    def samples_number(parent, classification_type):
        if parent.classification_type == 1:
            return int(20)

    def data_reader(parent):
        if parent.classification_type == 1:
            parent.load_data_for_classification_1()
            parent.one_hot_encoding(1)
        elif parent.classification_type == 5:
            parent.load_conv_data_for_classification_1()
            parent.one_hot_encoding(1)
        else:
            print("Error")

    def create_sample_training_set(parent, data, start_index, train_size, valve_opening, conv_axis_x):
        dataset_iter = 0
        for i in range(start_index, start_index + train_size):
            for x in range(conv_axis_x):
                for j in range(parent.number_of_sensor_in_use):
                    if parent.conv_dataset == True:
                        parent.training_data[i][x][j] = data[dataset_iter][j]
                    else:
                        parent.training_data[i][j] = data[dataset_iter][j]
                parent.training_valve_opening_scopes[i] = int(valve_opening // 1)
                dataset_iter += 1

    def create_sample_valid_set(parent, data, start_index, train_size, valid_size, valve_opening, conv_axis_x):
        dataset_iter = train_size
        for i in range(start_index, start_index + valid_size):
            for x in range(conv_axis_x):
                for j in range(parent.number_of_sensor_in_use):
                    if parent.conv_dataset == True:
                        parent.validation_data[i][x][j] = data[dataset_iter][j]
                    else:
                        parent.validation_data[i][j] = data[dataset_iter][j]
                parent.validation_valve_opening_scopes[i] = int(valve_opening // 1)
                dataset_iter += 1

    def load_data_for_classification_1(parent):
        last_valve_value = 0
        train_start_indx = 0
        valid_start_indx = 0
        for f in FILES_WITH_DATA:
            valve_value = float(os.path.basename(f)[:-4])
            if (valve_value // 1 % 5 == 0):
                if ((last_valve_value // 1) != (valve_value // 1)):
                    tmp_array_of_full_data = np.array(h5py.File(f).get('p').value)
                    parent.create_sample_training_set(tmp_array_of_full_data, train_start_indx, parent.train_size, valve_value, 1)
                    parent.create_sample_valid_set(tmp_array_of_full_data, valid_start_indx, parent.train_size, parent.valid_size, valve_value, 1)
                    train_start_indx += parent.train_size
                    #print(train_start_indx)
                    valid_start_indx += parent.valid_size
                    #print(valid_start_indx)
                    last_valve_value = valve_value
                    
    def load_conv_data_for_classification_1(parent):
        last_valve_value = 0
        train_start_indx = 0
        valid_start_indx = 0
        for f in FILES_WITH_DATA:
            valve_value = float(os.path.basename(f)[:-4])
            if (valve_value // 1 % 5 == 0):
                if ((last_valve_value // 1) != (valve_value // 1)):
                    tmp_array_of_full_data = np.array(h5py.File(f).get('p').value)
                    parent.create_sample_training_set(tmp_array_of_full_data, train_start_indx, parent.train_size, valve_value, self.conv_axis_X)
                    parent.create_sample_valid_set(tmp_array_of_full_data, valid_start_indx, parent.train_size, parent.valid_size, valve_value, self.conv_axis_X)
                    train_start_indx += parent.train_size
                    valid_start_indx += parent.valid_size
                    last_valve_value = valve_value

    def sort_files_with_data(parent):
        for j in range(len(FILES_WITH_DATA)):
            for i in range(len(FILES_WITH_DATA)-1):
                if (float(os.path.basename(FILES_WITH_DATA[i])[:-4]) > float(os.path.basename(FILES_WITH_DATA[i + 1])[:-4])):
                    tmp = FILES_WITH_DATA[i + 1]
                    FILES_WITH_DATA[i + 1] = FILES_WITH_DATA[i]
                    FILES_WITH_DATA[i] = tmp

    def one_hot_encoding(parent, classification_type):
        if (classification_type == 1):
            for i in range(len(parent.training_valve_opening_scopes)):
                parent.training_valve_opening_scopes[i] = parent.training_valve_opening_scopes[i] / 5
            for i in range(len(parent.validation_valve_opening_scopes)):
                parent.validation_valve_opening_scopes[i] = parent.validation_valve_opening_scopes[i] / 5
        else:
            print("Error")
        tf.keras.utils.to_categorical(parent.training_valve_opening_scopes) 
        tf.keras.utils.to_categorical(parent.validation_valve_opening_scopes)

In [None]:
#dt = Dataset() 
#dt = Dataset(data_number_in_sample = 20, conv_dataset = True)
# print(FILES_WITH_DATA.type())
# print(FILES_WITH_DATA)
# print("===========")
#print(dt.training_data)
# print("===========")
# print(dt.validation_valve_opening_scopes)
# print("===========")
# np.set_printoptions(threshold=np.inf)
#print(dt.validation_data)
#print(dt.validation_data.shape)

In [17]:
print(dt.validation_data[0])

[[9.00104455 0.53885013 5.09477751 1.7997669  1.33391057 0.22032908
  0.44148675]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.    

In [None]:
last_valve_value = 0
    train_start_indx = 0
    valid_start_indx = 0
    for f in FILES_WITH_DATA:
        valve_value = float(os.path.basename(f)[:-4])
        if (valve_value // 1 % 5 == 0):
            if ((last_valve_value // 1) != (valve_value // 1)):
                tmp_array_of_full_data = np.array(h5py.File(f).get('p').value)


In [16]:
# import numpy as np
# import glob as gl
# import csv
# import pandas
# import h5py
# from decimal import Decimal, getcontext
# files = gl.glob("../Data/*.txt")
# last = "c"
# for f in files:
#     os.remove(f)

In [11]:
# import numpy as np
# import glob as gl
# import csv
# import pandas
# import h5py
# from decimal import Decimal, getcontext
# files = gl.glob("../Data/*.mat")
# oldname = ""
# for f in files:
#     l = []
#     newname = f[f.find("TOA") + 3 : f.find("_a_")]
#     if oldname == newname:
#         oldname = newname
#     else:
#         #l.append(newname)
#         #l.append(r"C:\Users\Kowalio\Desktop\Badania SI maszyn przepływowych\Data")
#         #s = ''.join(1)
#         os.rename(f, newname + '.mat')
#         oldname = newname