In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
import scipy.interpolate
import os
import csv
from scipy.interpolate import griddata
from numpy import savetxt
import sklearn.utils.extmath
import progressbar
from fcmeans import FCM
from time import sleep

# FUNCTIONS

In [2]:
def get_field(path,simulations,seconds,mesh,field):
    Ns = len(simulations)*len(seconds)
    S = np.zeros((Ns,mesh))
    row = 0
    bar = progressbar.ProgressBar(maxval=len(simulations), \
    widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
    bar.start()
    for i,s in enumerate(simulations):
        bar.update(i+1)
        for t in seconds:
            data = get_data(path+'/'+'sim_'+str(s)+'/'+str(t)+'/'+field,mesh)
            S[row,:] = data
            row+=1
        sleep(0.1)
    bar.finish()
    return S

def get_data(path,mesh):
    with open(path, 'r') as file:
        output = [None]*mesh
        count = 0
        start = False
        for line in file:
            #print(str(line))
            if line[0] == ')':
                break
            if start:
                output[count] = (float(line))
                #print(output[count])
                count +=1
            if line[0] == "(":
                start = True
        return np.array(output)

def read_files_centers(file_path):
    with open(file_path, 'r') as file:
        if file_path[-1] != 'U':
            lines = np.array(file.read().split("\n"))
            start = np.where(lines == "(")
            end = np.where(lines == ")")
            if len(start[0]) != 0:
                output = np.array(lines[start[0][0]+1:end[0][0]])
                return output
            else:
                return []
        else:
            U = read_U(file_path)
            return U

def M_matrix(simulations,time,parameters,path):
    file = open(path+'/X_LHS_Uniform.csv')
    csvreader = csv.reader(file)
    #header = next(csvreader)
    #print(header)
    rows = []
    for count,row in enumerate(csvreader):
        if count+1<=simulations:
            rows.append(row)
        else: 
            break
    file.close()
    M = np.zeros((simulations*len(time),parameters))#parameters = parameters+1
    count = 0
    for i in rows:
        for t in time:
            first = np.array([t])
            second = np.array(i)
            #print(count*time+t)
            M[count,:] = np.concatenate([first,second])
            count+=1
    return M

def normalize_M(M):
    M_norm = np.zeros((np.shape(M)[0],np.shape(M)[1]))
    M_max = np.amax(M, axis=0)
    M_min = -np.amax(-M,axis=0)
    for i in range(np.shape(M)[0]):
        for j in range(np.shape(M)[1]):
            M_norm[i,j] = (M[i,j]- M_min[j])/(M_max[j]-M_min[j])
    return M_norm

def normalize_S(S):
    S_max = np.amax(S)
    S_min = -np.amax(-S)
    print(S_max)
    print(S_min)
    return (S-S_min)/(S_max-S_min)
        

def variance_simulations(S,t,seconds,sim):
    x,y = np.shape(S)
    var_mat = np.zeros((sim,y))
    for i in range(sim):
        var_mat[i,:] = S[(t-2)+i*seconds,:]   
    var_arr = np.std(var_mat,axis=0)
    ma = np.amax(var_arr)
    mi = np.amin(var_arr)
    return (var_arr-mi)/(ma-mi)
    

def formatNumber(num):
    arr = []
    for count,i in enumerate(num):
        if i % 1 == 0:
            arr.append(int(i))
        else:
            arr.append(i)
    return arr

def interpolate(x,y,z,step,method = 'cubic'):
    xi = np.arange(0,0.0006,step)
    yi = np.arange(0,0.0035,step)
    xi,yi = np.meshgrid(xi,yi)
    mask = ((xi > 0.0001) & (yi < 0.00118)) | ((xi > 7.9e-5) & (yi < 0.001784)&(yi > 0.001604))
    # interpolate
    zi = griddata((x,y),z,(xi,yi),method=method)
    zi[mask] = np.nan
    return xi,yi,zi

# DATA ACQUISITION AND PREPARATION OF INPUT MATRICES

In [12]:
#define variables important for acquisition and preparation of data
specie = 'vWFs'
N = 256 #size of POD basis
n_clusters = 3
n_param = 6
#choose which seconds from the simulations you want to get
rang = np.arange(2,42,1)
rang = formatNumber(rang)
print('Time interval = '+str(rang))
print('Length simulation = '  +str(len(rang)))


Time interval = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41]
Length simulation = 40


In [14]:
#get species snaphshots from simulations, parameter matrix including time and mesh coordinates;
#be careful to the name of the file and path, change it accordingly yo where the files are!

dir = '3_cluster_256_N'
path_data = '../POD-clot/'
S = (get_field(path_data+'DoE2',np.arange(1,101,1),rang,68650,specie))
M =(M_matrix(100,rang,n_param,path_data))
cell_centers = read_files_centers(path_data+'cellCenters')
cell_true = []
for i in cell_centers:
    cell_true.append(np.array(i[1:-1].split()).astype(np.float64))
cell_true = np.array(cell_true)
print('Shape S = ' + str(np.shape(S)))
print('Shape M = '+ str(np.shape(M)))



Shape S = (4000, 68650)
Shape M = (4000, 6)


# CLUSTERING SECTION

In [16]:
_,s,v = sklearn.utils.extmath.randomized_svd(S,N,random_state = 8) #random SVD on S

V_transp = v#define basis Matrix v  
Input = np.zeros((np.shape(S)[0],N))
print('Shape V transposed = '+ str(np.shape(V_transp)))
print('Shape Input coefficients = '+ str(np.shape(Input)))



Shape V transposed = (256, 68650)
Shape Input coefficients = (4000, 256)


In [17]:
#Get the coefficients for every snapshot of S and put it in Input
for count,data in enumerate(S):
    Input[count,:] = np.matmul(V_transp,data)
print('Shape Input coefficients = '+ str(np.shape(Input)))

Shape Input coefficients = (4000, 256)


In [18]:
#Normalize every snapshot coeffifients by their maximum and minimum
Input_norm = np.zeros((np.shape(S)[0],N))
for count,i in enumerate(Input):
    Input_norm[count,:] = (i-np.amin(i))/(np.amax(i)-np.amin(i))
Input = Input_norm

In [19]:
#perfoem the clustering
X = Input
fcm = FCM(n_clusters=n_clusters)
fcm.fit(X)
fcm_centers = fcm.centers #get the centers in a dimensional space of N dimension
fcm_labels = fcm.predict(X) # get the labels for each set of coefficients and thus for each snapshot

In [None]:
#divide in training and testing for the classification algorithm
param_train = M[0:3800].copy()
labels_train = fcm_labels[0:3800].copy()
params_test = M[:200].copy()
labels_test = fcm_labels[:200].copy()
print('Shape param_train = ' + str(np.shape(param_train)))
print('Shape param_test = ' + str(np.shape(param_train)))
print('Shape label_train = ' + str(np.shape(param_train)))
print('Shape label_test = ' + str(np.shape(param_train)))

# CREATE THE N_CLUSTERS POD BASIS BASED ON FCM_LABELS

In [20]:
# divide snapshots(not coefficietnts!) and parameters vectors in train and test

S_train = S[0:3800]
S_test = S[-200:]
M_train = M[0:3800]
M_test = M[-200:]
print('Shape S_train = ' + str(np.shape(S_train)))
print('Shape S_test = ' + str(np.shape(S_test)))
print('Shape M_train = ' + str(np.shape(M_train)))
print('Shape M_test = ' + str(np.shape(M_test)))

Shape S_train = (3800, 68650)
Shape S_test = (200, 68650)
Shape M_train = (3800, 6)
Shape M_test = (200, 6)


In [27]:
a = np.array([0,1,2,3,4])
b = a[0:2].copy()
b[0] = 4
print(a)
print(b)

[0 1 2 3 4]
[4 1]
