In [1]:
import numpy as np
from scipy.signal import chirp
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
import os
import h5py

In [5]:
# Reads plt files, converts into numpy array

def velfield(ts, boa):
    
    path = '/Users/kunwoosmac/Library/CloudStorage/OneDrive-IllinoisInstituteofTechnology/ibpm test/klee_Chirp_BoA'+str(boa)+'_f0p05f0p5_L09000/ibpm'\
                + str(format(ts, '05'))+'.plt'
    with open(path) as file:
        f_text = file.read()

    f_list = f_text[191:]
    f_list = f_list.split(' ')
    f_list.pop()

    UX, UY = [], []
    for idx, val in enumerate(f_list):
        if val[0] == '\n':
            UX.append(float(f_list[idx+2]))
            UY.append(float(f_list[idx+3]))
    UX = np.array(UX); UY = np.array(UY)

    UX = UX.reshape(299, 599)
    UY = UY.reshape(299, 599)

    return UX, UY

In [6]:
# This is how timestep, freq, angle is defined

ibpm_time = np.arange(5510, 14500, 10)
freq = 0.05 + ((ibpm_time - 5501)/100)/90*(0.5-0.05)

ori_time = np.arange(0, 90, 0.01)

angle = 5*chirp(ori_time, 0.05, 45, 0.5, method = 'linear', phi = 0)
angle = angle[9::10][:-1]

# ori_freq = 0.05 + ori_time/90*(0.5-0.05)
# ori_time = ori_time[9::10]

# plt.plot((ibpm_time - 5502)/100, freq)
# plt.plot(ori_time, ori_freq[9::10])
# plt.show()

# plt.plot(angle)
# plt.show()

# print(len(ibpm_time),len(angle), ori_time[-1], (ibpm_time[-1]-5501)/100)

# print(angle[0], np.cos(2*np.pi*freq*(ibpm_time - 5501)/100)[0])

# plt.plot(ibpm_time, angle)
# plt.plot(ibpm_time, 5*np.cos(2*np.pi*freq*(ibpm_time - 5501)/100))

In [7]:
# Categorize the continuous frequency

freq_cat = np.array([0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.6])

# Set threshold values to be the mean value between two categoires. i.e. if 0.075 < x < 0.15 then 0.1
thres = []
for i in range(8):
    thres.append(tuple([(freq_cat[i+1] + freq_cat[i])/2, (freq_cat[i+1] + freq_cat[i+2])/2]))

freq_cat = np.array([0.05, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5])

categorized_freq = []
for i, f in enumerate(freq):
    for cat_num in range(8):
        bound = thres[cat_num]
        if bound[0] < f and bound[1] > f:
            categorized_freq.append(freq_cat[cat_num])
            
categorized_freq = np.array(categorized_freq)

# Chirp BoA 25

In [8]:
# Load Chirp Data for boa 25

chirp_X25 = np.zeros((len(ibpm_time), 200, 200))

for i, timestep in enumerate(ibpm_time):
    
    UX, UY = velfield(timestep, 25)
    
    UX = UX[99:199, 399:599]
    UY = UY[99:199, 399:599]
    
    chirp_X25[i] = np.concatenate((UX, UY))
    
chirp_X25 = chirp_X25.reshape(len(ibpm_time), 200, 200,1)

boa = (np.full((len(ibpm_time), 1), 25.0)).reshape(-1,1)
typ = (np.full((len(ibpm_time), 1), 'chirp')).reshape(-1,1)
inst_angle = (25 - angle).reshape(-1,1)
inst_freq = freq.reshape(-1,1)

Y = np.column_stack((boa, categorized_freq, typ, inst_angle, inst_freq))


# Split Chirp dataset into train : test : validation = 70 : 15 : 15
train_X25, val_X25, train_Y25, val_Y25 = \
    train_test_split(chirp_X25, Y, test_size = 0.3, random_state = 0, shuffle = True, stratify = Y[:][:,0:2])

test_X25, val_X25, test_Y25, val_Y25 = \
    train_test_split(val_X25, val_Y25, test_size = 0.5, random_state = 0, shuffle = True, stratify = val_Y25[:][:,0:2])

# Check stratification
print("training :", Counter(train_Y25[:,0]), Counter(train_Y25[:, 1]))
print('validation :', Counter(val_Y25[:,0]), Counter(val_Y25[:, 1]))
print('test :', Counter(test_Y25[:,0]), Counter(test_Y25[:, 1]))

training : Counter({'25.0': 629}) Counter({'0.4': 105, '0.1': 105, '0.2': 105, '0.35': 70, '0.3': 70, '0.25': 70, '0.5': 69, '0.05': 35})
validation : Counter({'25.0': 135}) Counter({'0.1': 23, '0.4': 22, '0.2': 22, '0.25': 15, '0.5': 15, '0.3': 15, '0.35': 15, '0.05': 8})
test : Counter({'25.0': 135}) Counter({'0.4': 23, '0.2': 23, '0.1': 22, '0.25': 15, '0.3': 15, '0.35': 15, '0.5': 15, '0.05': 7})


# Chirp BoA 30

In [9]:
# Load Chirp Data for boa 30

chirp_X30 = np.zeros((len(ibpm_time), 200, 200))

for i, timestep in enumerate(ibpm_time):
    
    UX, UY = velfield(timestep, 30)
    
    UX = UX[99:199, 399:599]
    UY = UY[99:199, 399:599]
    
    chirp_X30[i] = np.concatenate((UX, UY))
    
chirp_X30 = chirp_X30.reshape(len(ibpm_time), 200, 200,1)

boa = (np.full((len(ibpm_time), 1), 30.0)).reshape(-1,1)
typ = (np.full((len(ibpm_time), 1), 'chirp')).reshape(-1,1)
inst_angle = (30 - angle).reshape(-1,1)

Y = np.column_stack((boa, categorized_freq, typ, inst_angle, inst_freq))


# Split Chirp dataset into train : test : validation = 70 : 15 : 15
train_X30, val_X30, train_Y30, val_Y30 = \
    train_test_split(chirp_X30, Y, test_size = 0.3, random_state = 0, shuffle = True, stratify = Y[:][:,0:2])

test_X30, val_X30, test_Y30, val_Y30 = \
    train_test_split(val_X30, val_Y30, test_size = 0.5, random_state = 0, shuffle = True, stratify = val_Y30[:][:,0:2])


# Check stratification
print("training :", Counter(train_Y30[:,0]), Counter(train_Y30[:, 1]))
print('validation :', Counter(val_Y30[:,0]), Counter(val_Y30[:, 1]))
print('test :', Counter(test_Y30[:,0]), Counter(test_Y30[:, 1]))

training : Counter({'30.0': 629}) Counter({'0.4': 105, '0.1': 105, '0.2': 105, '0.35': 70, '0.3': 70, '0.25': 70, '0.5': 69, '0.05': 35})
validation : Counter({'30.0': 135}) Counter({'0.1': 23, '0.4': 22, '0.2': 22, '0.25': 15, '0.5': 15, '0.3': 15, '0.35': 15, '0.05': 8})
test : Counter({'30.0': 135}) Counter({'0.4': 23, '0.2': 23, '0.1': 22, '0.25': 15, '0.3': 15, '0.35': 15, '0.5': 15, '0.05': 7})


# Sinusoidal

In [7]:
original_data_dir = "/Volumes/KleeFD/original data/"

filenames = os.listdir(original_data_dir)

first_iter = True
how_many_data = 0

print(filenames)

save_dir = '/Volumes/KleeFD/dataset/window_normalized/Window Size/one thirds/Unsplit Data/'


for filename in filenames:
    print(filename)
    # Get frequency info from filename
    try:
        frequency = (filename.split('p')[1]).split('.')[0]
        if len(frequency) == 1:
            frequency += '0'
        
        frequency = int(frequency)/100
        
        print(frequency)
        
    except:
        print("static!")
        # Skip if it is static
        continue
        
        
    # Get alpha info from filename
    
    alpha = float((filename.split('_')[1])[1:3])


    # Get the velocity data
    
    file_path = original_data_dir + filename
    file = h5py.File(file_path,'r')
    ux = np.array(file['ux'])[:,99:199, 399:599]
    uy = np.array(file['uy'])[:,99:199, 399:599]
    inst_alpha = np.array(file['alpha']).reshape(-1)
    
    # Create label
    label = np.array([float(alpha), (frequency)])
    
    file.close()

    print(label)
    
    X = np.empty(shape = (0,200,200))
    Y = np.empty(shape = (0,5))
    for i in range(0,ux.shape[0]):
        if i//10 == 0:
            print("Iter", i, end = '')
        u = np.concatenate((ux[i], uy[i]), axis = 0)  
        u = np.array([u])
        label = np.array([[str(alpha), str(frequency), 'sin', str(inst_alpha[i]), str(frequency)]])        
        X = np.append(X, u, axis = 0)
        Y = np.append(Y, label, axis = 0)
        
    np.save(save_dir+filename+'_X.npy', X)
    np.save(save_dir+filename+'_Y.npy', Y)
    

['.DS_Store', 'airfoilDNS_a25f0p05.h5', 'airfoilDNS_a25f0p1.h5', 'airfoilDNS_a25f0p2.h5', 'airfoilDNS_a25f0p25.h5', 'airfoilDNS_a25f0p3.h5', 'airfoilDNS_a25f0p35.h5', 'airfoilDNS_a25f0p4.h5', 'airfoilDNS_a25f0p5.h5', 'airfoilDNS_a30f0p05.h5', 'airfoilDNS_a30f0p1.h5', 'airfoilDNS_a30f0p2.h5', 'airfoilDNS_a30f0p25.h5', 'airfoilDNS_a30f0p3.h5', 'airfoilDNS_a30f0p35.h5', 'airfoilDNS_a30f0p4.h5', 'airfoilDNS_a30f0p5.h5']
.DS_Store
static!
airfoilDNS_a25f0p05.h5
0.05
[25.    0.05]

 next
Iter 0Iter 1Iter 2Iter 3Iter 4Iter 5Iter 6Iter 7Iter 8Iter 9airfoilDNS_a25f0p1.h5
0.1
[25.   0.1]

 next
Iter 0Iter 1Iter 2Iter 3Iter 4Iter 5Iter 6Iter 7Iter 8Iter 9airfoilDNS_a25f0p2.h5
0.2
[25.   0.2]

 next
Iter 0Iter 1Iter 2Iter 3Iter 4Iter 5Iter 6Iter 7Iter 8Iter 9airfoilDNS_a25f0p25.h5
0.25
[25.    0.25]

 next
Iter 0Iter 1Iter 2Iter 3Iter 4Iter 5Iter 6Iter 7Iter 8Iter 9airfoilDNS_a25f0p3.h5
0.3
[25.   0.3]

 next
Iter 0Iter 1Iter 2Iter 3Iter 4Iter 5Iter 6Iter 7Iter 8Iter 9airfoilDNS_a25f0p35.h5
0.35
[

In [10]:
# append all the data into one array
X = np.empty(shape = (0,200,200))
Y = np.empty(shape = (0,5))

filenames = ['airfoilDNS_a25f0p05.h5', 'airfoilDNS_a25f0p1.h5', 'airfoilDNS_a25f0p2.h5', 'airfoilDNS_a25f0p25.h5', 'airfoilDNS_a25f0p3.h5', 'airfoilDNS_a25f0p35.h5', 'airfoilDNS_a25f0p4.h5', 'airfoilDNS_a25f0p5.h5', 'airfoilDNS_a30f0p05.h5', 'airfoilDNS_a30f0p1.h5', 'airfoilDNS_a30f0p2.h5', 'airfoilDNS_a30f0p25.h5', 'airfoilDNS_a30f0p3.h5', 'airfoilDNS_a30f0p35.h5', 'airfoilDNS_a30f0p4.h5', 'airfoilDNS_a30f0p5.h5']
save_dir = '/Volumes/KleeFD/dataset/window_normalized/Window Size/one thirds/Unsplit Data/'

print(len(filenames))

for filename in filenames:    
    X = np.append(X, np.load(save_dir+filename+'_X.npy'), axis = 0)
    Y = np.append(Y, np.load(save_dir+filename+'_Y.npy'), axis = 0)

16


In [11]:
# Split Data
train_X, val_X, train_Y, val_Y = \
    train_test_split(X, Y, test_size = 0.3, random_state = 0, shuffle = True, stratify = Y[:,0:2])

test_X, val_X, test_Y, val_Y = \
    train_test_split(val_X, val_Y, test_size = 0.5, random_state = 0, shuffle = True, stratify = val_Y[:,0:2])

In [12]:
train_X = train_X.reshape(-1,200,200,1)
val_X = val_X.reshape(-1,200,200,1)
test_X = test_X.reshape(-1,200,200,1)

# Check stratification
print("training :", Counter(train_Y[:,0]), Counter(train_Y[:, 1]))
print('validation :', Counter(val_Y[:,0]), Counter(val_Y[:, 1]))
print('test :', Counter(test_Y[:,0]), Counter(test_Y[:, 1]))

training : Counter({'30.0': 5603, '25.0': 2248}) Counter({'0.1': 982, '0.25': 982, '0.4': 982, '0.2': 981, '0.5': 981, '0.35': 981, '0.3': 981, '0.05': 981})
validation : Counter({'30.0': 1203, '25.0': 480}) Counter({'0.5': 211, '0.3': 211, '0.35': 211, '0.1': 210, '0.4': 210, '0.05': 210, '0.25': 210, '0.2': 210})
test : Counter({'30.0': 1202, '25.0': 480}) Counter({'0.2': 211, '0.05': 211, '0.35': 210, '0.5': 210, '0.3': 210, '0.25': 210, '0.1': 210, '0.4': 210})


# Preprocess

In [13]:
train_X = np.concatenate((train_X25, train_X30, train_X), axis = 0)
val_X = np.concatenate((val_X25, val_X30, val_X), axis = 0)
test_X = np.concatenate((test_X25, test_X30, test_X), axis = 0)

In [14]:
train_Y = np.concatenate((train_Y25, train_Y30, train_Y), axis = 0)
val_Y = np.concatenate((val_Y25, val_Y30, val_Y), axis = 0)
test_Y = np.concatenate((test_Y25, test_Y30, test_Y), axis = 0)

In [15]:
# zero-center
mean = np.mean(train_X, axis = 0)

train_X -= mean
val_X -= mean
test_X -= mean

In [16]:
d = '/Volumes/KleeFD/dataset/dataset_chirp/04_sinusoidal + chirp _repreprocessed/'

np.save(d+"mean.npy", mean)

In [17]:
# Min-Max scaling

train_ux = train_X[:,0:100,:,:]; train_uy = train_X[:,100:200,:,:]
val_ux = val_X[:,0:100,:,:]; val_uy = val_X[:,100:200,:,:]
test_ux = test_X[:,0:100,:,:]; test_uy = test_X[:,100:200,:,:]


ux_max = 0; ux_min = 10000; uy_max = 0; uy_min = 10000

for ux in train_ux:
    M = np.max(ux); m = np.min(ux)
    if ux_max < M:
        ux_max = M
    elif ux_min > m:
        ux_min = m
   
for uy in train_uy:
    M = np.max(uy); m = np.min(uy)
    if uy_max < M:
        uy_max = M
    if uy_min > m:
        uy_min = m
        
print(ux_max, uy_max, ux_min, uy_min)
ux_range = ux_max - ux_min
uy_range = uy_max - uy_min

train_ux /= ux_range; train_uy /= uy_range
val_ux /= ux_range; val_uy /= uy_range
test_ux /= ux_range; test_uy /= uy_range

np.save(d+"ux_range.npy", ux_range)
np.save(d+"uy_range.npy", uy_range)

0.21903814447249836 0.422191862912087 -0.2759959115160803 -0.44240630080571963


In [15]:
for x, y, i in zip(train_ux, train_uy, range(len(train_uy))):
    train_X[i,0:100,:,:] = x
    train_X[i,100:200,:,:] = y
for x, y, i in zip(val_ux, val_uy, range(len(val_uy))):
    val_X[i,0:100,:,:] = x
    val_X[i,100:200,:,:] = y
for x, y, i  in zip(test_ux, test_uy, range(len(val_uy))):
    test_X[i,0:100,:,:] = x
    test_X[i,100:200,:,:] = y

# Shuffle & Save

In [16]:
s = np.arange(val_X.shape[0])
np.random.shuffle(s)

val_X = val_X[s]
val_Y = val_Y[s]

s = np.arange(train_X.shape[0])
np.random.shuffle(s)

train_X = train_X[s]
train_Y = train_Y[s]

In [17]:
d = '/Volumes/KleeFD/dataset/dataset_chirp/04_sinusoidal + chirp _repreprocessed/'

np.save(d+'train_X.npy', train_X)
np.save(d+'train_Y.npy', train_Y)

np.save(d+'val_X.npy', val_X)
np.save(d+'val_Y.npy', val_Y)

np.save(d+'test_X.npy', test_X)
np.save(d+'test_Y.npy', test_Y)

In [3]:
d = '/Volumes/KleeFD/dataset/dataset_chirp/04_sinusoidal + chirp _repreprocessed/'

train_Y = np.load(d+'train_Y.npy')
val_Y = np.load(d+'val_Y.npy')
test_Y = np.load(d+'test_Y.npy')

print(len(train_Y), len(val_Y), len(test_Y))

9109 1953 1952
