In [11]:
#Import the necessary libraries
import numpy as np
import pandas as pd
import random
#random.seed(42)

In [12]:
#This is the file write function to write the randomly generated dataset into desired txt format for use in Comsol.
def filewrite(filename, gap, lpcm, hpcm, wpcm, N):
    with open(filename,'w') as file:
        file.write('gap "')
        line_gap = [str(n) for n in gap]
        file.write(" ".join(line_gap)+"""" [mm]\n""") #Write Gap values

        file.write('Lpcm "')
        line_gap = [str(n) for n in lpcm]
        file.write(" ".join(line_gap)+"""" [mm]\n""") #Write Lpcm values

        file.write('Hpcm "')
        line_gap = [str(n) for n in hpcm]
        file.write(" ".join(line_gap)+"""" [mm]\n""") # Write Hpcm values

        file.write('Wpcm "')
        line_gap = [str(n) for n in wpcm]
        file.write(" ".join(line_gap)+"""" [mm]\n""") #Write Wpcm values

        file.write('N "')
        line_gap = [str(n) for n in N]
        file.write(" ".join(line_gap)+"""" []\n""") #Write N values
        
        file.close() #Close file

In [13]:
#Initialize empty arrays for each feature vector
l_pcm = []
h_pcm = []
w_pcm = []
gaps = []
N = []
#Generate random data points
for i in range(1000):
    l_pcm.append(round(random.uniform(1,18),2)) #Generate Lpcm values in range 1<=lpcm<=18 and rounded to 2 significant figures
for j in range(1000):
    h_pcm.append(round(random.uniform(1,8),2)) #Generate Hpcm values in range 1<=hpcm<=8 and rounded to 2 significant figures
for k in range(1000):
    for _ in range(50000):
        #Generate wpcm gaps and N simultaneously so that they fullfil the range (N)*wpcm +(N-1)gaps<18
        #Only values which satisfy this condition is included in the dataset
        w = random.uniform(1,18)
        g = random.uniform(1,16)
        n = random.randint(2,8)
        if(((n*w+(n-1)*g)>0) & ((n*w+(n-1)*g)<=18)):
            w_pcm.append(round(w,2))
            gaps.append(round(g,2))
            N.append(n)
            break
        else:
            continue

#Convert each array to numpy for easier calculation and reproduction
l_pcm = np.array(l_pcm)
h_pcm = np.array(h_pcm)
w_pcm = np.array(w_pcm)
gaps = np.array(gaps)
N = np.array(N,dtype="i")

#Each files
file1 = 'set1.txt'
file2 = 'set2.txt'
file3 = 'set3.txt'
file4 = 'set4.txt'
file5 = 'set5.txt'

#Write in each file in sets of 200
filewrite(file1,gaps[0:200],l_pcm[0:200],h_pcm[0:200],w_pcm[0:200],N[0:200])
filewrite(file2,gaps[200:400],l_pcm[200:400],h_pcm[200:400],w_pcm[200:400],N[200:400])
filewrite(file3,gaps[400:600],l_pcm[400:600],h_pcm[400:600],w_pcm[400:600],N[400:600])
filewrite(file4,gaps[600:800],l_pcm[600:800],h_pcm[600:800],w_pcm[600:800],N[600:800])
filewrite(file5,gaps[800:1000],l_pcm[800:1000],h_pcm[800:1000],w_pcm[800:1000],N[800:1000])

#Reshape and stack the feature vectors into a 2d array
l_pcm = l_pcm.reshape(-1,1)
h_pcm = h_pcm.reshape(-1,1)
w_pcm = w_pcm.reshape(-1,1)
gaps = gaps.reshape(-1,1)
N = N.reshape(-1,1)
dataset = np.hstack([l_pcm,h_pcm,w_pcm,gaps,N])
print(dataset)
                 

[[10.17  6.22  2.32  5.86  2.  ]
 [14.33  3.22  4.42  5.81  2.  ]
 [ 5.83  5.14  1.9   4.59  3.  ]
 ...
 [16.54  4.73  1.09 12.52  2.  ]
 [16.9   2.43  1.52  3.03  4.  ]
 [14.45  3.66  6.78  4.03  2.  ]]


In [14]:
#Save the 2d array as a dataframe
model_data = pd.DataFrame(dataset,columns=["L_pcm/mm","H_pcm/mm","W_pcm/mm","Gaps/mm","Number of cuts/N"])
model_data = model_data.astype({"Number of cuts/N":'int'})

In [15]:
#Print the head
print(model_data.head())

   L_pcm/mm  H_pcm/mm  W_pcm/mm  Gaps/mm  Number of cuts/N
0     10.17      6.22      2.32     5.86                 2
1     14.33      3.22      4.42     5.81                 2
2      5.83      5.14      1.90     4.59                 3
3      4.51      1.71      2.11     5.83                 2
4      5.19      7.58      3.15     2.48                 2


In [16]:
#Print the data type for each column
print(model_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   L_pcm/mm          1000 non-null   float64
 1   H_pcm/mm          1000 non-null   float64
 2   W_pcm/mm          1000 non-null   float64
 3   Gaps/mm           1000 non-null   float64
 4   Number of cuts/N  1000 non-null   int32  
dtypes: float64(4), int32(1)
memory usage: 35.3 KB
None


In [17]:
#Save to excel file
model_data.to_excel('model_data.xlsx',index=False)

In [18]:
#Save to csv
model_data.to_csv('model_data.csv',index=None)

In [19]:
#Testing if file can be read properly
test1 = pd.read_csv('model_data.csv')

In [20]:
#See the count of each value in number of cuts in the dataset
print(test1['Number of cuts/N'].value_counts())

Number of cuts/N
2    712
3    171
4     72
5     28
6     12
7      4
8      1
Name: count, dtype: int64
